Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Crystal Alchemist
#657
by
crystalai
- opened
This view is limited to 50 files because it contains too many changes.
See the raw diff here.
- .env.example +0 -3
- .gitattributes +35 -0
- .gitignore +10 -43
- .pre-commit-config.yaml +53 -0
- Dockerfile +0 -62
- Makefile +13 -0
- README.md +13 -75
- app.py +448 -0
- backend/Dockerfile.dev +0 -25
- backend/README.md +0 -352
- backend/__init__.py +0 -0
- backend/app/api/__init__.py +0 -5
- backend/app/api/dependencies.py +0 -34
- backend/app/api/endpoints/leaderboard.py +0 -49
- backend/app/api/endpoints/models.py +0 -103
- backend/app/api/endpoints/votes.py +0 -105
- backend/app/api/router.py +0 -9
- backend/app/asgi.py +0 -106
- backend/app/config/__init__.py +0 -6
- backend/app/config/base.py +0 -38
- backend/app/config/hf_config.py +0 -30
- backend/app/config/logging_config.py +0 -38
- backend/app/core/cache.py +0 -109
- backend/app/core/fastapi_cache.py +0 -48
- backend/app/core/formatting.py +0 -104
- backend/app/main.py +0 -18
- backend/app/services/__init__.py +0 -3
- backend/app/services/hf_service.py +0 -50
- backend/app/services/leaderboard.py +0 -208
- backend/app/services/models.py +0 -587
- backend/app/services/rate_limiter.py +0 -72
- backend/app/services/votes.py +0 -390
- backend/app/utils/__init__.py +0 -3
- backend/app/utils/logging.py +0 -3
- backend/app/utils/model_validation.py +0 -266
- backend/pyproject.toml +0 -31
- backend/utils/analyze_prod_datasets.py +0 -170
- backend/utils/analyze_prod_models.py +0 -106
- backend/utils/fix_wrong_model_size.py +0 -110
- backend/utils/last_activity.py +0 -164
- backend/utils/sync_datasets_locally.py +0 -130
- backend/uv.lock +0 -0
- docker-compose.yml +0 -33
- frontend/Dockerfile.dev +0 -15
- frontend/README.md +0 -80
- frontend/package.json +0 -55
- frontend/public/index.html +0 -96
- frontend/public/logo256.png +0 -0
- frontend/public/logo32.png +0 -0
- frontend/public/og-image.jpg +0 -0
.env.example
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
ENVIRONMENT=development
|
2 |
-
HF_TOKEN=xxx
|
3 |
-
HF_HOME=.cache
|
|
|
|
|
|
|
|
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -1,45 +1,12 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
__pycache__
|
4 |
-
.cache/
|
5 |
-
|
6 |
-
# dependencies
|
7 |
-
|
8 |
-
frontend/node_modules
|
9 |
-
/.pnp
|
10 |
-
.pnp.js
|
11 |
-
|
12 |
-
# testing
|
13 |
-
|
14 |
-
/coverage
|
15 |
-
|
16 |
-
# production
|
17 |
-
|
18 |
-
/build
|
19 |
-
|
20 |
-
# misc
|
21 |
-
|
22 |
-
.DS_Store
|
23 |
-
.env.local
|
24 |
-
.env.development.local
|
25 |
-
.env.test.local
|
26 |
-
.env.production.local
|
27 |
-
|
28 |
-
npm-debug.log*
|
29 |
-
yarn-debug.log*
|
30 |
-
yarn-error.log\*
|
31 |
-
|
32 |
-
src/dataframe.json
|
33 |
-
|
34 |
-
yarn.lock
|
35 |
-
package-lock.json
|
36 |
-
|
37 |
-
/public
|
38 |
-
|
39 |
-
.claudesync/
|
40 |
-
|
41 |
-
# Environment variables
|
42 |
.env
|
43 |
-
.
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
45 |
|
|
|
|
1 |
+
venv/
|
2 |
+
__pycache__/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
.env
|
4 |
+
.ipynb_checkpoints
|
5 |
+
*ipynb
|
6 |
+
.vscode/
|
7 |
+
|
8 |
+
eval-queue/
|
9 |
+
eval-results/
|
10 |
+
dynamic-info/
|
11 |
|
12 |
+
src/assets/model_counts.html
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
default_language_version:
|
16 |
+
python: python3
|
17 |
+
|
18 |
+
ci:
|
19 |
+
autofix_prs: true
|
20 |
+
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
|
21 |
+
autoupdate_schedule: quarterly
|
22 |
+
|
23 |
+
repos:
|
24 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
25 |
+
rev: v4.3.0
|
26 |
+
hooks:
|
27 |
+
- id: check-yaml
|
28 |
+
- id: check-case-conflict
|
29 |
+
- id: detect-private-key
|
30 |
+
- id: check-added-large-files
|
31 |
+
args: ['--maxkb=1000']
|
32 |
+
- id: requirements-txt-fixer
|
33 |
+
- id: end-of-file-fixer
|
34 |
+
- id: trailing-whitespace
|
35 |
+
|
36 |
+
- repo: https://github.com/PyCQA/isort
|
37 |
+
rev: 5.12.0
|
38 |
+
hooks:
|
39 |
+
- id: isort
|
40 |
+
name: Format imports
|
41 |
+
|
42 |
+
- repo: https://github.com/psf/black
|
43 |
+
rev: 22.12.0
|
44 |
+
hooks:
|
45 |
+
- id: black
|
46 |
+
name: Format code
|
47 |
+
additional_dependencies: ['click==8.0.2']
|
48 |
+
|
49 |
+
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
50 |
+
# Ruff version.
|
51 |
+
rev: 'v0.0.267'
|
52 |
+
hooks:
|
53 |
+
- id: ruff
|
Dockerfile
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
# Build frontend
|
2 |
-
FROM node:18 as frontend-build
|
3 |
-
WORKDIR /app
|
4 |
-
COPY frontend/package*.json ./
|
5 |
-
RUN npm install
|
6 |
-
COPY frontend/ ./
|
7 |
-
|
8 |
-
RUN npm run build
|
9 |
-
|
10 |
-
# Build backend
|
11 |
-
FROM python:3.12-slim
|
12 |
-
WORKDIR /app
|
13 |
-
|
14 |
-
# Create non-root user
|
15 |
-
RUN useradd -m -u 1000 user
|
16 |
-
|
17 |
-
# Install poetry
|
18 |
-
RUN pip install poetry
|
19 |
-
|
20 |
-
# Create and configure cache directory
|
21 |
-
RUN mkdir -p /app/.cache && \
|
22 |
-
chown -R user:user /app
|
23 |
-
|
24 |
-
# Copy and install backend dependencies
|
25 |
-
COPY backend/pyproject.toml backend/poetry.lock* ./
|
26 |
-
RUN poetry config virtualenvs.create false \
|
27 |
-
&& poetry install --no-interaction --no-ansi --no-root --only main
|
28 |
-
|
29 |
-
# Copy backend code
|
30 |
-
COPY backend/ .
|
31 |
-
|
32 |
-
# Install Node.js and npm
|
33 |
-
RUN apt-get update && apt-get install -y \
|
34 |
-
curl \
|
35 |
-
netcat-openbsd \
|
36 |
-
&& curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
|
37 |
-
&& apt-get install -y nodejs \
|
38 |
-
&& rm -rf /var/lib/apt/lists/*
|
39 |
-
|
40 |
-
# Copy frontend server and build
|
41 |
-
COPY --from=frontend-build /app/build ./frontend/build
|
42 |
-
COPY --from=frontend-build /app/package*.json ./frontend/
|
43 |
-
COPY --from=frontend-build /app/server.js ./frontend/
|
44 |
-
|
45 |
-
# Install frontend production dependencies
|
46 |
-
WORKDIR /app/frontend
|
47 |
-
RUN npm install --production
|
48 |
-
WORKDIR /app
|
49 |
-
|
50 |
-
# Environment variables
|
51 |
-
ENV HF_HOME=/app/.cache \
|
52 |
-
HF_DATASETS_CACHE=/app/.cache \
|
53 |
-
INTERNAL_API_PORT=7861 \
|
54 |
-
PORT=7860 \
|
55 |
-
NODE_ENV=production
|
56 |
-
|
57 |
-
# Note: HF_TOKEN should be provided at runtime, not build time
|
58 |
-
USER user
|
59 |
-
EXPOSE 7860
|
60 |
-
|
61 |
-
# Start both servers with wait-for
|
62 |
-
CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Makefile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: style format
|
2 |
+
|
3 |
+
|
4 |
+
style:
|
5 |
+
python -m black --line-length 119 .
|
6 |
+
python -m isort .
|
7 |
+
ruff check --fix .
|
8 |
+
|
9 |
+
|
10 |
+
quality:
|
11 |
+
python -m black --check --line-length 119 .
|
12 |
+
python -m isort --check-only .
|
13 |
+
ruff check .
|
README.md
CHANGED
@@ -1,85 +1,23 @@
|
|
1 |
---
|
2 |
title: Open LLM Leaderboard
|
3 |
emoji: 🏆
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
7 |
-
|
|
|
8 |
pinned: true
|
9 |
license: apache-2.0
|
10 |
-
duplicated_from:
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
tags:
|
12 |
- leaderboard
|
13 |
short_description: Track, rank and evaluate open LLMs and chatbots
|
14 |
---
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
Modern React interface for comparing Large Language Models (LLMs) in an open and reproducible way.
|
19 |
-
|
20 |
-
## Features
|
21 |
-
|
22 |
-
- 📊 Interactive table with advanced sorting and filtering
|
23 |
-
- 🔍 Semantic model search
|
24 |
-
- 📌 Pin models for comparison
|
25 |
-
- 📱 Responsive and modern interface
|
26 |
-
- 🎨 Dark/Light mode
|
27 |
-
- ⚡️ Optimized performance with virtualization
|
28 |
-
|
29 |
-
## Architecture
|
30 |
-
|
31 |
-
The project is split into two main parts:
|
32 |
-
|
33 |
-
### Frontend (React)
|
34 |
-
|
35 |
-
```
|
36 |
-
frontend/
|
37 |
-
├── src/
|
38 |
-
│ ├── components/ # Reusable UI components
|
39 |
-
│ ├── pages/ # Application pages
|
40 |
-
│ ├── hooks/ # Custom React hooks
|
41 |
-
│ ├── context/ # React contexts
|
42 |
-
│ └── constants/ # Constants and configurations
|
43 |
-
├── public/ # Static assets
|
44 |
-
└── server.js # Express server for production
|
45 |
-
```
|
46 |
-
|
47 |
-
### Backend (FastAPI)
|
48 |
-
|
49 |
-
```
|
50 |
-
backend/
|
51 |
-
├── app/
|
52 |
-
│ ├── api/ # API router and endpoints
|
53 |
-
│ │ └── endpoints/ # Specific API endpoints
|
54 |
-
│ ├── core/ # Core functionality
|
55 |
-
│ ├── config/ # Configuration
|
56 |
-
│ └── services/ # Business logic services
|
57 |
-
│ ├── leaderboard.py
|
58 |
-
│ ├── models.py
|
59 |
-
│ ├── votes.py
|
60 |
-
│ └── hf_service.py
|
61 |
-
└── utils/ # Utility functions
|
62 |
-
```
|
63 |
-
|
64 |
-
## Technologies
|
65 |
-
|
66 |
-
### Frontend
|
67 |
-
|
68 |
-
- React
|
69 |
-
- Material-UI
|
70 |
-
- TanStack Table & Virtual
|
71 |
-
- Express.js
|
72 |
-
|
73 |
-
### Backend
|
74 |
-
|
75 |
-
- FastAPI
|
76 |
-
- Hugging Face API
|
77 |
-
- Docker
|
78 |
-
|
79 |
-
## Development
|
80 |
-
|
81 |
-
The application is containerized using Docker and can be run using:
|
82 |
-
|
83 |
-
```bash
|
84 |
-
docker-compose up
|
85 |
-
```
|
|
|
1 |
---
|
2 |
title: Open LLM Leaderboard
|
3 |
emoji: 🏆
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.9.0
|
8 |
+
app_file: app.py
|
9 |
pinned: true
|
10 |
license: apache-2.0
|
11 |
+
duplicated_from: HuggingFaceH4/open_llm_leaderboard
|
12 |
+
fullWidth: true
|
13 |
+
space_ci:
|
14 |
+
private: true
|
15 |
+
secrets:
|
16 |
+
- HF_TOKEN
|
17 |
+
- H4_TOKEN
|
18 |
tags:
|
19 |
- leaderboard
|
20 |
short_description: Track, rank and evaluate open LLMs and chatbots
|
21 |
---
|
22 |
|
23 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,448 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
from gradio_space_ci import enable_space_ci
|
6 |
+
|
7 |
+
from src.display.about import (
|
8 |
+
CITATION_BUTTON_LABEL,
|
9 |
+
CITATION_BUTTON_TEXT,
|
10 |
+
EVALUATION_QUEUE_TEXT,
|
11 |
+
INTRODUCTION_TEXT,
|
12 |
+
LLM_BENCHMARKS_TEXT,
|
13 |
+
FAQ_TEXT,
|
14 |
+
TITLE,
|
15 |
+
)
|
16 |
+
from src.display.css_html_js import custom_css
|
17 |
+
from src.display.utils import (
|
18 |
+
BENCHMARK_COLS,
|
19 |
+
COLS,
|
20 |
+
EVAL_COLS,
|
21 |
+
EVAL_TYPES,
|
22 |
+
NUMERIC_INTERVALS,
|
23 |
+
TYPES,
|
24 |
+
AutoEvalColumn,
|
25 |
+
ModelType,
|
26 |
+
fields,
|
27 |
+
WeightType,
|
28 |
+
Precision
|
29 |
+
)
|
30 |
+
from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
31 |
+
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
32 |
+
from src.submission.submit import add_new_eval
|
33 |
+
from src.scripts.update_all_request_files import update_dynamic_files
|
34 |
+
from src.tools.collections import update_collections
|
35 |
+
from src.tools.plots import (
|
36 |
+
create_metric_plot_obj,
|
37 |
+
create_plot_df,
|
38 |
+
create_scores_df,
|
39 |
+
)
|
40 |
+
|
41 |
+
# Start ephemeral Spaces on PRs (see config in README.md)
|
42 |
+
#enable_space_ci()
|
43 |
+
|
44 |
+
def restart_space():
|
45 |
+
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
46 |
+
|
47 |
+
|
48 |
+
def init_space(full_init: bool = True):
|
49 |
+
if full_init:
|
50 |
+
try:
|
51 |
+
print(EVAL_REQUESTS_PATH)
|
52 |
+
snapshot_download(
|
53 |
+
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
54 |
+
)
|
55 |
+
except Exception:
|
56 |
+
restart_space()
|
57 |
+
try:
|
58 |
+
print(DYNAMIC_INFO_PATH)
|
59 |
+
snapshot_download(
|
60 |
+
repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
61 |
+
)
|
62 |
+
except Exception:
|
63 |
+
restart_space()
|
64 |
+
try:
|
65 |
+
print(EVAL_RESULTS_PATH)
|
66 |
+
snapshot_download(
|
67 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
68 |
+
)
|
69 |
+
except Exception:
|
70 |
+
restart_space()
|
71 |
+
|
72 |
+
|
73 |
+
raw_data, original_df = get_leaderboard_df(
|
74 |
+
results_path=EVAL_RESULTS_PATH,
|
75 |
+
requests_path=EVAL_REQUESTS_PATH,
|
76 |
+
dynamic_path=DYNAMIC_INFO_FILE_PATH,
|
77 |
+
cols=COLS,
|
78 |
+
benchmark_cols=BENCHMARK_COLS
|
79 |
+
)
|
80 |
+
update_collections(original_df.copy())
|
81 |
+
leaderboard_df = original_df.copy()
|
82 |
+
|
83 |
+
plot_df = create_plot_df(create_scores_df(raw_data))
|
84 |
+
|
85 |
+
(
|
86 |
+
finished_eval_queue_df,
|
87 |
+
running_eval_queue_df,
|
88 |
+
pending_eval_queue_df,
|
89 |
+
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
90 |
+
|
91 |
+
return leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
92 |
+
|
93 |
+
leaderboard_df, original_df, plot_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
94 |
+
|
95 |
+
|
96 |
+
# Searching and filtering
|
97 |
+
def update_table(
|
98 |
+
hidden_df: pd.DataFrame,
|
99 |
+
columns: list,
|
100 |
+
type_query: list,
|
101 |
+
precision_query: str,
|
102 |
+
size_query: list,
|
103 |
+
hide_models: list,
|
104 |
+
query: str,
|
105 |
+
):
|
106 |
+
filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models)
|
107 |
+
filtered_df = filter_queries(query, filtered_df)
|
108 |
+
df = select_columns(filtered_df, columns)
|
109 |
+
return df
|
110 |
+
|
111 |
+
|
112 |
+
def load_query(request: gr.Request): # triggered only once at startup => read query parameter if it exists
|
113 |
+
query = request.query_params.get("query") or ""
|
114 |
+
return query, query # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed
|
115 |
+
|
116 |
+
|
117 |
+
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
118 |
+
return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
|
119 |
+
|
120 |
+
|
121 |
+
def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
|
122 |
+
always_here_cols = [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
123 |
+
dummy_col = [AutoEvalColumn.dummy.name]
|
124 |
+
#AutoEvalColumn.model_type_symbol.name,
|
125 |
+
#AutoEvalColumn.model.name,
|
126 |
+
# We use COLS to maintain sorting
|
127 |
+
filtered_df = df[
|
128 |
+
always_here_cols + [c for c in COLS if c in df.columns and c in columns] + dummy_col
|
129 |
+
]
|
130 |
+
return filtered_df
|
131 |
+
|
132 |
+
|
133 |
+
def filter_queries(query: str, filtered_df: pd.DataFrame):
|
134 |
+
"""Added by Abishek"""
|
135 |
+
final_df = []
|
136 |
+
if query != "":
|
137 |
+
queries = [q.strip() for q in query.split(";")]
|
138 |
+
for _q in queries:
|
139 |
+
_q = _q.strip()
|
140 |
+
if _q != "":
|
141 |
+
temp_filtered_df = search_table(filtered_df, _q)
|
142 |
+
if len(temp_filtered_df) > 0:
|
143 |
+
final_df.append(temp_filtered_df)
|
144 |
+
if len(final_df) > 0:
|
145 |
+
filtered_df = pd.concat(final_df)
|
146 |
+
filtered_df = filtered_df.drop_duplicates(
|
147 |
+
subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
|
148 |
+
)
|
149 |
+
|
150 |
+
return filtered_df
|
151 |
+
|
152 |
+
|
153 |
+
def filter_models(
|
154 |
+
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
|
155 |
+
) -> pd.DataFrame:
|
156 |
+
# Show all models
|
157 |
+
if "Private or deleted" in hide_models:
|
158 |
+
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
159 |
+
else:
|
160 |
+
filtered_df = df
|
161 |
+
|
162 |
+
if "Contains a merge/moerge" in hide_models:
|
163 |
+
filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
|
164 |
+
|
165 |
+
if "MoE" in hide_models:
|
166 |
+
filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
|
167 |
+
|
168 |
+
if "Flagged" in hide_models:
|
169 |
+
filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
|
170 |
+
|
171 |
+
type_emoji = [t[0] for t in type_query]
|
172 |
+
filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
173 |
+
filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
|
174 |
+
|
175 |
+
numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
|
176 |
+
params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
|
177 |
+
mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
|
178 |
+
filtered_df = filtered_df.loc[mask]
|
179 |
+
|
180 |
+
return filtered_df
|
181 |
+
|
182 |
+
leaderboard_df = filter_models(
|
183 |
+
df=leaderboard_df,
|
184 |
+
type_query=[t.to_str(" : ") for t in ModelType],
|
185 |
+
size_query=list(NUMERIC_INTERVALS.keys()),
|
186 |
+
precision_query=[i.value.name for i in Precision],
|
187 |
+
hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs
|
188 |
+
)
|
189 |
+
|
190 |
+
demo = gr.Blocks(css=custom_css)
|
191 |
+
with demo:
|
192 |
+
gr.HTML(TITLE)
|
193 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
194 |
+
|
195 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
196 |
+
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
197 |
+
with gr.Row():
|
198 |
+
with gr.Column():
|
199 |
+
with gr.Row():
|
200 |
+
search_bar = gr.Textbox(
|
201 |
+
placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
|
202 |
+
show_label=False,
|
203 |
+
elem_id="search-bar",
|
204 |
+
)
|
205 |
+
with gr.Row():
|
206 |
+
shown_columns = gr.CheckboxGroup(
|
207 |
+
choices=[
|
208 |
+
c.name
|
209 |
+
for c in fields(AutoEvalColumn)
|
210 |
+
if not c.hidden and not c.never_hidden and not c.dummy
|
211 |
+
],
|
212 |
+
value=[
|
213 |
+
c.name
|
214 |
+
for c in fields(AutoEvalColumn)
|
215 |
+
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
216 |
+
],
|
217 |
+
label="Select columns to show",
|
218 |
+
elem_id="column-select",
|
219 |
+
interactive=True,
|
220 |
+
)
|
221 |
+
with gr.Row():
|
222 |
+
hide_models = gr.CheckboxGroup(
|
223 |
+
label="Hide models",
|
224 |
+
choices = ["Private or deleted", "Contains a merge/moerge", "Flagged", "MoE"],
|
225 |
+
value=["Private or deleted", "Contains a merge/moerge", "Flagged"],
|
226 |
+
interactive=True
|
227 |
+
)
|
228 |
+
with gr.Column(min_width=320):
|
229 |
+
#with gr.Box(elem_id="box-filter"):
|
230 |
+
filter_columns_type = gr.CheckboxGroup(
|
231 |
+
label="Model types",
|
232 |
+
choices=[t.to_str() for t in ModelType],
|
233 |
+
value=[t.to_str() for t in ModelType],
|
234 |
+
interactive=True,
|
235 |
+
elem_id="filter-columns-type",
|
236 |
+
)
|
237 |
+
filter_columns_precision = gr.CheckboxGroup(
|
238 |
+
label="Precision",
|
239 |
+
choices=[i.value.name for i in Precision],
|
240 |
+
value=[i.value.name for i in Precision],
|
241 |
+
interactive=True,
|
242 |
+
elem_id="filter-columns-precision",
|
243 |
+
)
|
244 |
+
filter_columns_size = gr.CheckboxGroup(
|
245 |
+
label="Model sizes (in billions of parameters)",
|
246 |
+
choices=list(NUMERIC_INTERVALS.keys()),
|
247 |
+
value=list(NUMERIC_INTERVALS.keys()),
|
248 |
+
interactive=True,
|
249 |
+
elem_id="filter-columns-size",
|
250 |
+
)
|
251 |
+
|
252 |
+
leaderboard_table = gr.components.Dataframe(
|
253 |
+
value=leaderboard_df[
|
254 |
+
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
255 |
+
+ shown_columns.value
|
256 |
+
+ [AutoEvalColumn.dummy.name]
|
257 |
+
],
|
258 |
+
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
259 |
+
datatype=TYPES,
|
260 |
+
elem_id="leaderboard-table",
|
261 |
+
interactive=False,
|
262 |
+
visible=True,
|
263 |
+
#column_widths=["2%", "33%"]
|
264 |
+
)
|
265 |
+
|
266 |
+
# Dummy leaderboard for handling the case when the user uses backspace key
|
267 |
+
hidden_leaderboard_table_for_search = gr.components.Dataframe(
|
268 |
+
value=original_df[COLS],
|
269 |
+
headers=COLS,
|
270 |
+
datatype=TYPES,
|
271 |
+
visible=False,
|
272 |
+
)
|
273 |
+
search_bar.submit(
|
274 |
+
update_table,
|
275 |
+
[
|
276 |
+
hidden_leaderboard_table_for_search,
|
277 |
+
shown_columns,
|
278 |
+
filter_columns_type,
|
279 |
+
filter_columns_precision,
|
280 |
+
filter_columns_size,
|
281 |
+
hide_models,
|
282 |
+
search_bar,
|
283 |
+
],
|
284 |
+
leaderboard_table,
|
285 |
+
)
|
286 |
+
|
287 |
+
# Define a hidden component that will trigger a reload only if a query parameter has been set
|
288 |
+
hidden_search_bar = gr.Textbox(value="", visible=False)
|
289 |
+
hidden_search_bar.change(
|
290 |
+
update_table,
|
291 |
+
[
|
292 |
+
hidden_leaderboard_table_for_search,
|
293 |
+
shown_columns,
|
294 |
+
filter_columns_type,
|
295 |
+
filter_columns_precision,
|
296 |
+
filter_columns_size,
|
297 |
+
hide_models,
|
298 |
+
search_bar,
|
299 |
+
],
|
300 |
+
leaderboard_table,
|
301 |
+
)
|
302 |
+
# Check query parameter once at startup and update search bar + hidden component
|
303 |
+
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
304 |
+
|
305 |
+
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models]:
|
306 |
+
selector.change(
|
307 |
+
update_table,
|
308 |
+
[
|
309 |
+
hidden_leaderboard_table_for_search,
|
310 |
+
shown_columns,
|
311 |
+
filter_columns_type,
|
312 |
+
filter_columns_precision,
|
313 |
+
filter_columns_size,
|
314 |
+
hide_models,
|
315 |
+
search_bar,
|
316 |
+
],
|
317 |
+
leaderboard_table,
|
318 |
+
queue=True,
|
319 |
+
)
|
320 |
+
|
321 |
+
with gr.TabItem("📈 Metrics through time", elem_id="llm-benchmark-tab-table", id=2):
|
322 |
+
with gr.Row():
|
323 |
+
with gr.Column():
|
324 |
+
chart = create_metric_plot_obj(
|
325 |
+
plot_df,
|
326 |
+
[AutoEvalColumn.average.name],
|
327 |
+
title="Average of Top Scores and Human Baseline Over Time (from last update)",
|
328 |
+
)
|
329 |
+
gr.Plot(value=chart, min_width=500)
|
330 |
+
with gr.Column():
|
331 |
+
chart = create_metric_plot_obj(
|
332 |
+
plot_df,
|
333 |
+
BENCHMARK_COLS,
|
334 |
+
title="Top Scores and Human Baseline Over Time (from last update)",
|
335 |
+
)
|
336 |
+
gr.Plot(value=chart, min_width=500)
|
337 |
+
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
338 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
339 |
+
|
340 |
+
with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=4):
|
341 |
+
gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
|
342 |
+
|
343 |
+
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
|
344 |
+
with gr.Column():
|
345 |
+
with gr.Row():
|
346 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
347 |
+
|
348 |
+
with gr.Row():
|
349 |
+
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
350 |
+
|
351 |
+
with gr.Row():
|
352 |
+
with gr.Column():
|
353 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
354 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
355 |
+
private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
|
356 |
+
model_type = gr.Dropdown(
|
357 |
+
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
358 |
+
label="Model type",
|
359 |
+
multiselect=False,
|
360 |
+
value=ModelType.FT.to_str(" : "),
|
361 |
+
interactive=True,
|
362 |
+
)
|
363 |
+
|
364 |
+
with gr.Column():
|
365 |
+
precision = gr.Dropdown(
|
366 |
+
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
367 |
+
label="Precision",
|
368 |
+
multiselect=False,
|
369 |
+
value="float16",
|
370 |
+
interactive=True,
|
371 |
+
)
|
372 |
+
weight_type = gr.Dropdown(
|
373 |
+
choices=[i.value.name for i in WeightType],
|
374 |
+
label="Weights type",
|
375 |
+
multiselect=False,
|
376 |
+
value="Original",
|
377 |
+
interactive=True,
|
378 |
+
)
|
379 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
380 |
+
|
381 |
+
with gr.Column():
|
382 |
+
with gr.Accordion(
|
383 |
+
f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
384 |
+
open=False,
|
385 |
+
):
|
386 |
+
with gr.Row():
|
387 |
+
finished_eval_table = gr.components.Dataframe(
|
388 |
+
value=finished_eval_queue_df,
|
389 |
+
headers=EVAL_COLS,
|
390 |
+
datatype=EVAL_TYPES,
|
391 |
+
row_count=5,
|
392 |
+
)
|
393 |
+
with gr.Accordion(
|
394 |
+
f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
|
395 |
+
open=False,
|
396 |
+
):
|
397 |
+
with gr.Row():
|
398 |
+
running_eval_table = gr.components.Dataframe(
|
399 |
+
value=running_eval_queue_df,
|
400 |
+
headers=EVAL_COLS,
|
401 |
+
datatype=EVAL_TYPES,
|
402 |
+
row_count=5,
|
403 |
+
)
|
404 |
+
|
405 |
+
with gr.Accordion(
|
406 |
+
f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
407 |
+
open=False,
|
408 |
+
):
|
409 |
+
with gr.Row():
|
410 |
+
pending_eval_table = gr.components.Dataframe(
|
411 |
+
value=pending_eval_queue_df,
|
412 |
+
headers=EVAL_COLS,
|
413 |
+
datatype=EVAL_TYPES,
|
414 |
+
row_count=5,
|
415 |
+
)
|
416 |
+
|
417 |
+
submit_button = gr.Button("Submit Eval")
|
418 |
+
submission_result = gr.Markdown()
|
419 |
+
submit_button.click(
|
420 |
+
add_new_eval,
|
421 |
+
[
|
422 |
+
model_name_textbox,
|
423 |
+
base_model_name_textbox,
|
424 |
+
revision_name_textbox,
|
425 |
+
precision,
|
426 |
+
private,
|
427 |
+
weight_type,
|
428 |
+
model_type,
|
429 |
+
],
|
430 |
+
submission_result,
|
431 |
+
)
|
432 |
+
|
433 |
+
with gr.Row():
|
434 |
+
with gr.Accordion("📙 Citation", open=False):
|
435 |
+
citation_button = gr.Textbox(
|
436 |
+
value=CITATION_BUTTON_TEXT,
|
437 |
+
label=CITATION_BUTTON_LABEL,
|
438 |
+
lines=20,
|
439 |
+
elem_id="citation-button",
|
440 |
+
show_copy_button=True,
|
441 |
+
)
|
442 |
+
|
443 |
+
scheduler = BackgroundScheduler()
|
444 |
+
scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h
|
445 |
+
scheduler.add_job(update_dynamic_files, "interval", hours=2) # launched every 2 hour
|
446 |
+
scheduler.start()
|
447 |
+
|
448 |
+
demo.queue(default_concurrency_limit=40).launch()
|
backend/Dockerfile.dev
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
FROM python:3.12-slim
|
2 |
-
|
3 |
-
WORKDIR /app
|
4 |
-
|
5 |
-
# Install required system dependencies
|
6 |
-
RUN apt-get update && apt-get install -y \
|
7 |
-
build-essential \
|
8 |
-
&& rm -rf /var/lib/apt/lists/*
|
9 |
-
|
10 |
-
# Install poetry
|
11 |
-
RUN pip install poetry
|
12 |
-
|
13 |
-
# Copy Poetry configuration files
|
14 |
-
COPY pyproject.toml poetry.lock* ./
|
15 |
-
|
16 |
-
# Install dependencies
|
17 |
-
RUN poetry config virtualenvs.create false && \
|
18 |
-
poetry install --no-interaction --no-ansi --no-root
|
19 |
-
|
20 |
-
# Environment variables configuration for logs
|
21 |
-
ENV PYTHONUNBUFFERED=1
|
22 |
-
ENV LOG_LEVEL=INFO
|
23 |
-
|
24 |
-
# In dev, mount volume directly
|
25 |
-
CMD ["uvicorn", "app.asgi:app", "--host", "0.0.0.0", "--port", "7860", "--reload", "--log-level", "warning", "--no-access-log"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/README.md
DELETED
@@ -1,352 +0,0 @@
|
|
1 |
-
# Backend - Open LLM Leaderboard 🏆
|
2 |
-
|
3 |
-
FastAPI backend for the Open LLM Leaderboard. This service is part of a larger architecture that includes a React frontend. For complete project installation, see the [main README](../README.md).
|
4 |
-
|
5 |
-
## ✨ Features
|
6 |
-
|
7 |
-
- 📊 REST API for LLM models leaderboard management
|
8 |
-
- 🗳️ Voting and ranking system
|
9 |
-
- 🔄 HuggingFace Hub integration
|
10 |
-
- 🚀 Caching and performance optimizations
|
11 |
-
|
12 |
-
## 🏗 Architecture
|
13 |
-
|
14 |
-
```mermaid
|
15 |
-
flowchart TD
|
16 |
-
Client(["**Frontend**<br><br>React Application"]) --> API["**API Server**<br><br>FastAPI REST Endpoints"]
|
17 |
-
|
18 |
-
subgraph Backend
|
19 |
-
API --> Core["**Core Layer**<br><br>• Middleware<br>• Cache<br>• Rate Limiting"]
|
20 |
-
Core --> Services["**Services Layer**<br><br>• Business Logic<br>• Data Processing"]
|
21 |
-
|
22 |
-
subgraph Services Layer
|
23 |
-
Services --> Models["**Model Service**<br><br>• Model Submission<br>• Evaluation Pipeline"]
|
24 |
-
Services --> Votes["**Vote Service**<br><br>• Vote Management<br>• Data Synchronization"]
|
25 |
-
Services --> Board["**Leaderboard Service**<br><br>• Rankings<br>• Performance Metrics"]
|
26 |
-
end
|
27 |
-
|
28 |
-
Models --> Cache["**Cache Layer**<br><br>• In-Memory Store<br>• Auto Invalidation"]
|
29 |
-
Votes --> Cache
|
30 |
-
Board --> Cache
|
31 |
-
|
32 |
-
Models --> HF["**HuggingFace Hub**<br><br>• Models Repository<br>• Datasets Access"]
|
33 |
-
Votes --> HF
|
34 |
-
Board --> HF
|
35 |
-
end
|
36 |
-
|
37 |
-
style Client fill:#f9f,stroke:#333,stroke-width:2px
|
38 |
-
style Models fill:#bbf,stroke:#333,stroke-width:2px
|
39 |
-
style Votes fill:#bbf,stroke:#333,stroke-width:2px
|
40 |
-
style Board fill:#bbf,stroke:#333,stroke-width:2px
|
41 |
-
style HF fill:#bfb,stroke:#333,stroke-width:2px
|
42 |
-
```
|
43 |
-
|
44 |
-
## 🛠️ HuggingFace Datasets
|
45 |
-
|
46 |
-
The application uses several datasets on the HuggingFace Hub:
|
47 |
-
|
48 |
-
### 1. Requests Dataset (`{HF_ORGANIZATION}/requests`)
|
49 |
-
|
50 |
-
- **Operations**:
|
51 |
-
- 📤 `POST /api/models/submit`: Adds a JSON file for each new model submission
|
52 |
-
- 📥 `GET /api/models/status`: Reads files to get models status
|
53 |
-
- **Format**: One JSON file per model with submission details
|
54 |
-
- **Updates**: On each new model submission
|
55 |
-
|
56 |
-
### 2. Votes Dataset (`{HF_ORGANIZATION}/votes`)
|
57 |
-
|
58 |
-
- **Operations**:
|
59 |
-
- 📤 `POST /api/votes/{model_id}`: Adds a new vote
|
60 |
-
- 📥 `GET /api/votes/model/{provider}/{model}`: Reads model votes
|
61 |
-
- 📥 `GET /api/votes/user/{user_id}`: Reads user votes
|
62 |
-
- **Format**: JSONL with one vote per line
|
63 |
-
- **Sync**: Bidirectional between local cache and Hub
|
64 |
-
|
65 |
-
### 3. Contents Dataset (`{HF_ORGANIZATION}/contents`)
|
66 |
-
|
67 |
-
- **Operations**:
|
68 |
-
- 📥 `GET /api/leaderboard`: Reads raw data
|
69 |
-
- 📥 `GET /api/leaderboard/formatted`: Reads and formats data
|
70 |
-
- **Format**: Main dataset containing all scores and metrics
|
71 |
-
- **Updates**: Automatic after model evaluations
|
72 |
-
|
73 |
-
### 4. Official Providers Dataset (`{HF_ORGANIZATION}/official-providers`)
|
74 |
-
|
75 |
-
- **Operations**:
|
76 |
-
- 📥 Read-only access for highlighted models
|
77 |
-
- **Format**: List of models selected by maintainers
|
78 |
-
- **Updates**: Manual by maintainers
|
79 |
-
|
80 |
-
## 🛠 Local Development
|
81 |
-
|
82 |
-
### Prerequisites
|
83 |
-
|
84 |
-
- Python 3.9+
|
85 |
-
- [Poetry](https://python-poetry.org/docs/#installation)
|
86 |
-
|
87 |
-
### Standalone Installation (without Docker)
|
88 |
-
|
89 |
-
```bash
|
90 |
-
# Install dependencies
|
91 |
-
poetry install
|
92 |
-
|
93 |
-
# Setup configuration
|
94 |
-
cp .env.example .env
|
95 |
-
|
96 |
-
# Start development server
|
97 |
-
poetry run uvicorn app.asgi:app --host 0.0.0.0 --port 7860 --reload
|
98 |
-
```
|
99 |
-
|
100 |
-
Server will be available at http://localhost:7860
|
101 |
-
|
102 |
-
## ⚙️ Configuration
|
103 |
-
|
104 |
-
| Variable | Description | Default |
|
105 |
-
| ------------ | ------------------------------------ | ----------- |
|
106 |
-
| ENVIRONMENT | Environment (development/production) | development |
|
107 |
-
| HF_TOKEN | HuggingFace authentication token | - |
|
108 |
-
| PORT | Server port | 7860 |
|
109 |
-
| LOG_LEVEL | Logging level (INFO/DEBUG/WARNING) | INFO |
|
110 |
-
| CORS_ORIGINS | Allowed CORS origins | ["*"] |
|
111 |
-
| CACHE_TTL | Cache Time To Live in seconds | 300 |
|
112 |
-
|
113 |
-
## 🔧 Middleware
|
114 |
-
|
115 |
-
The backend uses several middleware layers for optimal performance and security:
|
116 |
-
|
117 |
-
- **CORS Middleware**: Handles Cross-Origin Resource Sharing
|
118 |
-
- **GZIP Middleware**: Compresses responses > 500 bytes
|
119 |
-
- **Rate Limiting**: Prevents API abuse
|
120 |
-
- **Caching**: In-memory caching with automatic invalidation
|
121 |
-
|
122 |
-
## 📝 Logging
|
123 |
-
|
124 |
-
The application uses a structured logging system with:
|
125 |
-
|
126 |
-
- Formatted console output
|
127 |
-
- Different log levels per component
|
128 |
-
- Request/Response logging
|
129 |
-
- Performance metrics
|
130 |
-
- Error tracking
|
131 |
-
|
132 |
-
## 📁 File Structure
|
133 |
-
|
134 |
-
```
|
135 |
-
backend/
|
136 |
-
├── app/ # Source code
|
137 |
-
│ ├── api/ # Routes and endpoints
|
138 |
-
│ │ └── endpoints/ # Endpoint handlers
|
139 |
-
│ ├── core/ # Configurations
|
140 |
-
│ ├── services/ # Business logic
|
141 |
-
│ └── utils/ # Utilities
|
142 |
-
└── tests/ # Tests
|
143 |
-
```
|
144 |
-
|
145 |
-
## 📚 API
|
146 |
-
|
147 |
-
Swagger documentation available at http://localhost:7860/docs
|
148 |
-
|
149 |
-
### Main Endpoints & Data Structures
|
150 |
-
|
151 |
-
#### Leaderboard
|
152 |
-
|
153 |
-
- `GET /api/leaderboard/formatted` - Formatted data with computed fields and metadata
|
154 |
-
|
155 |
-
```typescript
|
156 |
-
Response {
|
157 |
-
models: [{
|
158 |
-
id: string, // eval_name
|
159 |
-
model: {
|
160 |
-
name: string, // fullname
|
161 |
-
sha: string, // Model sha
|
162 |
-
precision: string, // e.g. "fp16", "int8"
|
163 |
-
type: string, // e.g. "fined-tuned-on-domain-specific-dataset"
|
164 |
-
weight_type: string,
|
165 |
-
architecture: string,
|
166 |
-
average_score: number,
|
167 |
-
has_chat_template: boolean
|
168 |
-
},
|
169 |
-
evaluations: {
|
170 |
-
ifeval: {
|
171 |
-
name: "IFEval",
|
172 |
-
value: number, // Raw score
|
173 |
-
normalized_score: number
|
174 |
-
},
|
175 |
-
bbh: {
|
176 |
-
name: "BBH",
|
177 |
-
value: number,
|
178 |
-
normalized_score: number
|
179 |
-
},
|
180 |
-
math: {
|
181 |
-
name: "MATH Level 5",
|
182 |
-
value: number,
|
183 |
-
normalized_score: number
|
184 |
-
},
|
185 |
-
gpqa: {
|
186 |
-
name: "GPQA",
|
187 |
-
value: number,
|
188 |
-
normalized_score: number
|
189 |
-
},
|
190 |
-
musr: {
|
191 |
-
name: "MUSR",
|
192 |
-
value: number,
|
193 |
-
normalized_score: number
|
194 |
-
},
|
195 |
-
mmlu_pro: {
|
196 |
-
name: "MMLU-PRO",
|
197 |
-
value: number,
|
198 |
-
normalized_score: number
|
199 |
-
}
|
200 |
-
},
|
201 |
-
features: {
|
202 |
-
is_not_available_on_hub: boolean,
|
203 |
-
is_merged: boolean,
|
204 |
-
is_moe: boolean,
|
205 |
-
is_flagged: boolean,
|
206 |
-
is_official_provider: boolean
|
207 |
-
},
|
208 |
-
metadata: {
|
209 |
-
upload_date: string,
|
210 |
-
submission_date: string,
|
211 |
-
generation: string,
|
212 |
-
base_model: string,
|
213 |
-
hub_license: string,
|
214 |
-
hub_hearts: number,
|
215 |
-
params_billions: number,
|
216 |
-
co2_cost: number // CO₂ cost in kg
|
217 |
-
}
|
218 |
-
}]
|
219 |
-
}
|
220 |
-
```
|
221 |
-
|
222 |
-
- `GET /api/leaderboard` - Raw data from the HuggingFace dataset
|
223 |
-
```typescript
|
224 |
-
Response {
|
225 |
-
models: [{
|
226 |
-
eval_name: string,
|
227 |
-
Precision: string,
|
228 |
-
Type: string,
|
229 |
-
"Weight type": string,
|
230 |
-
Architecture: string,
|
231 |
-
Model: string,
|
232 |
-
fullname: string,
|
233 |
-
"Model sha": string,
|
234 |
-
"Average ⬆️": number,
|
235 |
-
"Hub License": string,
|
236 |
-
"Hub ❤️": number,
|
237 |
-
"#Params (B)": number,
|
238 |
-
"Available on the hub": boolean,
|
239 |
-
Merged: boolean,
|
240 |
-
MoE: boolean,
|
241 |
-
Flagged: boolean,
|
242 |
-
"Chat Template": boolean,
|
243 |
-
"CO₂ cost (kg)": number,
|
244 |
-
"IFEval Raw": number,
|
245 |
-
IFEval: number,
|
246 |
-
"BBH Raw": number,
|
247 |
-
BBH: number,
|
248 |
-
"MATH Lvl 5 Raw": number,
|
249 |
-
"MATH Lvl 5": number,
|
250 |
-
"GPQA Raw": number,
|
251 |
-
GPQA: number,
|
252 |
-
"MUSR Raw": number,
|
253 |
-
MUSR: number,
|
254 |
-
"MMLU-PRO Raw": number,
|
255 |
-
"MMLU-PRO": number,
|
256 |
-
"Maintainer's Highlight": boolean,
|
257 |
-
"Upload To Hub Date": string,
|
258 |
-
"Submission Date": string,
|
259 |
-
Generation: string,
|
260 |
-
"Base Model": string
|
261 |
-
}]
|
262 |
-
}
|
263 |
-
```
|
264 |
-
|
265 |
-
#### Models
|
266 |
-
|
267 |
-
- `GET /api/models/status` - Get all models grouped by status
|
268 |
-
```typescript
|
269 |
-
Response {
|
270 |
-
pending: [{
|
271 |
-
name: string,
|
272 |
-
submitter: string,
|
273 |
-
revision: string,
|
274 |
-
wait_time: string,
|
275 |
-
submission_time: string,
|
276 |
-
status: "PENDING" | "EVALUATING" | "FINISHED",
|
277 |
-
precision: string
|
278 |
-
}],
|
279 |
-
evaluating: Array<Model>,
|
280 |
-
finished: Array<Model>
|
281 |
-
}
|
282 |
-
```
|
283 |
-
- `GET /api/models/pending` - Get pending models only
|
284 |
-
- `POST /api/models/submit` - Submit model
|
285 |
-
|
286 |
-
```typescript
|
287 |
-
Request {
|
288 |
-
user_id: string,
|
289 |
-
model_id: string,
|
290 |
-
base_model?: string,
|
291 |
-
precision?: string,
|
292 |
-
model_type: string
|
293 |
-
}
|
294 |
-
|
295 |
-
Response {
|
296 |
-
status: string,
|
297 |
-
message: string
|
298 |
-
}
|
299 |
-
```
|
300 |
-
|
301 |
-
- `GET /api/models/{model_id}/status` - Get model status
|
302 |
-
|
303 |
-
#### Votes
|
304 |
-
|
305 |
-
- `POST /api/votes/{model_id}` - Vote
|
306 |
-
|
307 |
-
```typescript
|
308 |
-
Request {
|
309 |
-
vote_type: "up" | "down",
|
310 |
-
user_id: string // HuggingFace username
|
311 |
-
}
|
312 |
-
|
313 |
-
Response {
|
314 |
-
success: boolean,
|
315 |
-
message: string
|
316 |
-
}
|
317 |
-
```
|
318 |
-
|
319 |
-
- `GET /api/votes/model/{provider}/{model}` - Get model votes
|
320 |
-
```typescript
|
321 |
-
Response {
|
322 |
-
total_votes: number,
|
323 |
-
up_votes: number,
|
324 |
-
down_votes: number
|
325 |
-
}
|
326 |
-
```
|
327 |
-
- `GET /api/votes/user/{user_id}` - Get user votes
|
328 |
-
```typescript
|
329 |
-
Response Array<{
|
330 |
-
model_id: string,
|
331 |
-
vote_type: string,
|
332 |
-
timestamp: string
|
333 |
-
}>
|
334 |
-
```
|
335 |
-
|
336 |
-
## 🔒 Authentication
|
337 |
-
|
338 |
-
The backend uses HuggingFace token-based authentication for secure API access. Make sure to:
|
339 |
-
|
340 |
-
1. Set your HF_TOKEN in the .env file
|
341 |
-
2. Include the token in API requests via Bearer authentication
|
342 |
-
3. Keep your token secure and never commit it to version control
|
343 |
-
|
344 |
-
## 🚀 Performance
|
345 |
-
|
346 |
-
The backend implements several optimizations:
|
347 |
-
|
348 |
-
- In-memory caching with configurable TTL (Time To Live)
|
349 |
-
- Batch processing for model evaluations
|
350 |
-
- Rate limiting for API endpoints
|
351 |
-
- Efficient database queries with proper indexing
|
352 |
-
- Automatic cache invalidation for votes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/__init__.py
DELETED
File without changes
|
backend/app/api/__init__.py
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
API package initialization
|
3 |
-
"""
|
4 |
-
|
5 |
-
__all__ = ["endpoints"]
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/dependencies.py
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
from fastapi import Depends, HTTPException
|
2 |
-
import logging
|
3 |
-
from app.services.models import ModelService
|
4 |
-
from app.services.votes import VoteService
|
5 |
-
from app.core.formatting import LogFormatter
|
6 |
-
|
7 |
-
logger = logging.getLogger(__name__)
|
8 |
-
|
9 |
-
model_service = ModelService()
|
10 |
-
vote_service = VoteService()
|
11 |
-
|
12 |
-
async def get_model_service() -> ModelService:
|
13 |
-
"""Dependency to get ModelService instance"""
|
14 |
-
try:
|
15 |
-
logger.info(LogFormatter.info("Initializing model service dependency"))
|
16 |
-
await model_service.initialize()
|
17 |
-
logger.info(LogFormatter.success("Model service initialized"))
|
18 |
-
return model_service
|
19 |
-
except Exception as e:
|
20 |
-
error_msg = "Failed to initialize model service"
|
21 |
-
logger.error(LogFormatter.error(error_msg, e))
|
22 |
-
raise HTTPException(status_code=500, detail=str(e))
|
23 |
-
|
24 |
-
async def get_vote_service() -> VoteService:
|
25 |
-
"""Dependency to get VoteService instance"""
|
26 |
-
try:
|
27 |
-
logger.info(LogFormatter.info("Initializing vote service dependency"))
|
28 |
-
await vote_service.initialize()
|
29 |
-
logger.info(LogFormatter.success("Vote service initialized"))
|
30 |
-
return vote_service
|
31 |
-
except Exception as e:
|
32 |
-
error_msg = "Failed to initialize vote service"
|
33 |
-
logger.error(LogFormatter.error(error_msg, e))
|
34 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/endpoints/leaderboard.py
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
from fastapi import APIRouter
|
2 |
-
from typing import List, Dict, Any
|
3 |
-
from app.services.leaderboard import LeaderboardService
|
4 |
-
from app.core.fastapi_cache import cached, build_cache_key
|
5 |
-
import logging
|
6 |
-
from app.core.formatting import LogFormatter
|
7 |
-
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
router = APIRouter()
|
10 |
-
leaderboard_service = LeaderboardService()
|
11 |
-
|
12 |
-
def leaderboard_key_builder(func, namespace: str = "leaderboard", **kwargs):
|
13 |
-
"""Build cache key for leaderboard data"""
|
14 |
-
key_type = "raw" if func.__name__ == "get_leaderboard" else "formatted"
|
15 |
-
key = build_cache_key(namespace, key_type)
|
16 |
-
logger.debug(LogFormatter.info(f"Built leaderboard cache key: {key}"))
|
17 |
-
return key
|
18 |
-
|
19 |
-
@router.get("")
|
20 |
-
@cached(expire=300, key_builder=leaderboard_key_builder)
|
21 |
-
async def get_leaderboard() -> List[Dict[str, Any]]:
|
22 |
-
"""
|
23 |
-
Get raw leaderboard data
|
24 |
-
Response will be automatically GZIP compressed if size > 500 bytes
|
25 |
-
"""
|
26 |
-
try:
|
27 |
-
logger.info(LogFormatter.info("Fetching raw leaderboard data"))
|
28 |
-
data = await leaderboard_service.fetch_raw_data()
|
29 |
-
logger.info(LogFormatter.success(f"Retrieved {len(data)} leaderboard entries"))
|
30 |
-
return data
|
31 |
-
except Exception as e:
|
32 |
-
logger.error(LogFormatter.error("Failed to fetch raw leaderboard data", e))
|
33 |
-
raise
|
34 |
-
|
35 |
-
@router.get("/formatted")
|
36 |
-
@cached(expire=300, key_builder=leaderboard_key_builder)
|
37 |
-
async def get_formatted_leaderboard() -> List[Dict[str, Any]]:
|
38 |
-
"""
|
39 |
-
Get formatted leaderboard data with restructured objects
|
40 |
-
Response will be automatically GZIP compressed if size > 500 bytes
|
41 |
-
"""
|
42 |
-
try:
|
43 |
-
logger.info(LogFormatter.info("Fetching formatted leaderboard data"))
|
44 |
-
data = await leaderboard_service.get_formatted_data()
|
45 |
-
logger.info(LogFormatter.success(f"Retrieved {len(data)} formatted entries"))
|
46 |
-
return data
|
47 |
-
except Exception as e:
|
48 |
-
logger.error(LogFormatter.error("Failed to fetch formatted leaderboard data", e))
|
49 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/endpoints/models.py
DELETED
@@ -1,103 +0,0 @@
|
|
1 |
-
from fastapi import APIRouter, HTTPException, Depends
|
2 |
-
from typing import Dict, Any, List
|
3 |
-
import logging
|
4 |
-
from app.services.models import ModelService
|
5 |
-
from app.api.dependencies import get_model_service
|
6 |
-
from app.core.fastapi_cache import cached
|
7 |
-
from app.core.formatting import LogFormatter
|
8 |
-
|
9 |
-
logger = logging.getLogger(__name__)
|
10 |
-
router = APIRouter(tags=["models"])
|
11 |
-
|
12 |
-
@router.get("/status")
|
13 |
-
@cached(expire=300)
|
14 |
-
async def get_models_status(
|
15 |
-
model_service: ModelService = Depends(get_model_service)
|
16 |
-
) -> Dict[str, List[Dict[str, Any]]]:
|
17 |
-
"""Get all models grouped by status"""
|
18 |
-
try:
|
19 |
-
logger.info(LogFormatter.info("Fetching status for all models"))
|
20 |
-
result = await model_service.get_models()
|
21 |
-
stats = {
|
22 |
-
status: len(models) for status, models in result.items()
|
23 |
-
}
|
24 |
-
for line in LogFormatter.stats(stats, "Models by Status"):
|
25 |
-
logger.info(line)
|
26 |
-
return result
|
27 |
-
except Exception as e:
|
28 |
-
logger.error(LogFormatter.error("Failed to get models status", e))
|
29 |
-
raise HTTPException(status_code=500, detail=str(e))
|
30 |
-
|
31 |
-
@router.get("/pending")
|
32 |
-
@cached(expire=60)
|
33 |
-
async def get_pending_models(
|
34 |
-
model_service: ModelService = Depends(get_model_service)
|
35 |
-
) -> List[Dict[str, Any]]:
|
36 |
-
"""Get all models waiting for evaluation"""
|
37 |
-
try:
|
38 |
-
logger.info(LogFormatter.info("Fetching pending models"))
|
39 |
-
models = await model_service.get_models()
|
40 |
-
pending = models.get("pending", [])
|
41 |
-
logger.info(LogFormatter.success(f"Found {len(pending)} pending models"))
|
42 |
-
return pending
|
43 |
-
except Exception as e:
|
44 |
-
logger.error(LogFormatter.error("Failed to get pending models", e))
|
45 |
-
raise HTTPException(status_code=500, detail=str(e))
|
46 |
-
|
47 |
-
@router.post("/submit")
|
48 |
-
async def submit_model(
|
49 |
-
model_data: Dict[str, Any],
|
50 |
-
model_service: ModelService = Depends(get_model_service)
|
51 |
-
) -> Dict[str, Any]:
|
52 |
-
try:
|
53 |
-
logger.info(LogFormatter.section("MODEL SUBMISSION"))
|
54 |
-
|
55 |
-
user_id = model_data.pop('user_id', None)
|
56 |
-
if not user_id:
|
57 |
-
error_msg = "user_id is required"
|
58 |
-
logger.error(LogFormatter.error("Validation failed", error_msg))
|
59 |
-
raise ValueError(error_msg)
|
60 |
-
|
61 |
-
# Log submission details
|
62 |
-
submission_info = {
|
63 |
-
"Model_ID": model_data.get("model_id"),
|
64 |
-
"User": user_id,
|
65 |
-
"Base_Model": model_data.get("base_model"),
|
66 |
-
"Precision": model_data.get("precision"),
|
67 |
-
"Model_Type": model_data.get("model_type")
|
68 |
-
}
|
69 |
-
for line in LogFormatter.tree(submission_info, "Submission Details"):
|
70 |
-
logger.info(line)
|
71 |
-
|
72 |
-
result = await model_service.submit_model(model_data, user_id)
|
73 |
-
logger.info(LogFormatter.success("Model submitted successfully"))
|
74 |
-
return result
|
75 |
-
|
76 |
-
except ValueError as e:
|
77 |
-
logger.error(LogFormatter.error("Invalid submission data", e))
|
78 |
-
raise HTTPException(status_code=400, detail=str(e))
|
79 |
-
except Exception as e:
|
80 |
-
logger.error(LogFormatter.error("Submission failed", e))
|
81 |
-
raise HTTPException(status_code=500, detail=str(e))
|
82 |
-
|
83 |
-
@router.get("/{model_id}/status")
|
84 |
-
async def get_model_status(
|
85 |
-
model_id: str,
|
86 |
-
model_service: ModelService = Depends(get_model_service)
|
87 |
-
) -> Dict[str, Any]:
|
88 |
-
try:
|
89 |
-
logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
|
90 |
-
status = await model_service.get_model_status(model_id)
|
91 |
-
|
92 |
-
if status["status"] != "not_found":
|
93 |
-
logger.info(LogFormatter.success("Status found"))
|
94 |
-
for line in LogFormatter.tree(status, "Model Status"):
|
95 |
-
logger.info(line)
|
96 |
-
else:
|
97 |
-
logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
|
98 |
-
|
99 |
-
return status
|
100 |
-
|
101 |
-
except Exception as e:
|
102 |
-
logger.error(LogFormatter.error("Failed to get model status", e))
|
103 |
-
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/endpoints/votes.py
DELETED
@@ -1,105 +0,0 @@
|
|
1 |
-
from fastapi import APIRouter, HTTPException, Query, Depends
|
2 |
-
from typing import Dict, Any, List
|
3 |
-
from app.services.votes import VoteService
|
4 |
-
from app.core.fastapi_cache import cached, build_cache_key, invalidate_cache_key
|
5 |
-
import logging
|
6 |
-
from app.core.formatting import LogFormatter
|
7 |
-
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
router = APIRouter()
|
10 |
-
vote_service = VoteService()
|
11 |
-
|
12 |
-
def model_votes_key_builder(func, namespace: str = "model_votes", **kwargs):
|
13 |
-
"""Build cache key for model votes"""
|
14 |
-
provider = kwargs.get('provider')
|
15 |
-
model = kwargs.get('model')
|
16 |
-
key = build_cache_key(namespace, provider, model)
|
17 |
-
logger.debug(LogFormatter.info(f"Built model votes cache key: {key}"))
|
18 |
-
return key
|
19 |
-
|
20 |
-
def user_votes_key_builder(func, namespace: str = "user_votes", **kwargs):
|
21 |
-
"""Build cache key for user votes"""
|
22 |
-
user_id = kwargs.get('user_id')
|
23 |
-
key = build_cache_key(namespace, user_id)
|
24 |
-
logger.debug(LogFormatter.info(f"Built user votes cache key: {key}"))
|
25 |
-
return key
|
26 |
-
|
27 |
-
@router.post("/{model_id:path}")
|
28 |
-
async def add_vote(
|
29 |
-
model_id: str,
|
30 |
-
vote_type: str = Query(..., description="Type of vote (up/down)"),
|
31 |
-
user_id: str = Query(..., description="HuggingFace username")
|
32 |
-
) -> Dict[str, Any]:
|
33 |
-
try:
|
34 |
-
logger.info(LogFormatter.section("ADDING VOTE"))
|
35 |
-
stats = {
|
36 |
-
"Model": model_id,
|
37 |
-
"User": user_id,
|
38 |
-
"Type": vote_type
|
39 |
-
}
|
40 |
-
for line in LogFormatter.tree(stats, "Vote Details"):
|
41 |
-
logger.info(line)
|
42 |
-
|
43 |
-
await vote_service.initialize()
|
44 |
-
result = await vote_service.add_vote(model_id, user_id, vote_type)
|
45 |
-
|
46 |
-
# Invalidate affected caches
|
47 |
-
try:
|
48 |
-
logger.info(LogFormatter.subsection("CACHE INVALIDATION"))
|
49 |
-
provider, model = model_id.split('/', 1)
|
50 |
-
|
51 |
-
# Build and invalidate cache keys
|
52 |
-
model_cache_key = build_cache_key("model_votes", provider, model)
|
53 |
-
user_cache_key = build_cache_key("user_votes", user_id)
|
54 |
-
|
55 |
-
invalidate_cache_key(model_cache_key)
|
56 |
-
invalidate_cache_key(user_cache_key)
|
57 |
-
|
58 |
-
cache_stats = {
|
59 |
-
"Model_Cache": model_cache_key,
|
60 |
-
"User_Cache": user_cache_key
|
61 |
-
}
|
62 |
-
for line in LogFormatter.tree(cache_stats, "Invalidated Caches"):
|
63 |
-
logger.info(line)
|
64 |
-
|
65 |
-
except Exception as e:
|
66 |
-
logger.error(LogFormatter.error("Failed to invalidate cache", e))
|
67 |
-
|
68 |
-
return result
|
69 |
-
except Exception as e:
|
70 |
-
logger.error(LogFormatter.error("Failed to add vote", e))
|
71 |
-
raise HTTPException(status_code=400, detail=str(e))
|
72 |
-
|
73 |
-
@router.get("/model/{provider}/{model}")
|
74 |
-
@cached(expire=60, key_builder=model_votes_key_builder)
|
75 |
-
async def get_model_votes(
|
76 |
-
provider: str,
|
77 |
-
model: str
|
78 |
-
) -> Dict[str, Any]:
|
79 |
-
"""Get all votes for a specific model"""
|
80 |
-
try:
|
81 |
-
logger.info(LogFormatter.info(f"Fetching votes for model: {provider}/{model}"))
|
82 |
-
await vote_service.initialize()
|
83 |
-
model_id = f"{provider}/{model}"
|
84 |
-
result = await vote_service.get_model_votes(model_id)
|
85 |
-
logger.info(LogFormatter.success(f"Found {result.get('total_votes', 0)} votes"))
|
86 |
-
return result
|
87 |
-
except Exception as e:
|
88 |
-
logger.error(LogFormatter.error("Failed to get model votes", e))
|
89 |
-
raise HTTPException(status_code=400, detail=str(e))
|
90 |
-
|
91 |
-
@router.get("/user/{user_id}")
|
92 |
-
@cached(expire=60, key_builder=user_votes_key_builder)
|
93 |
-
async def get_user_votes(
|
94 |
-
user_id: str
|
95 |
-
) -> List[Dict[str, Any]]:
|
96 |
-
"""Get all votes from a specific user"""
|
97 |
-
try:
|
98 |
-
logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
|
99 |
-
await vote_service.initialize()
|
100 |
-
votes = await vote_service.get_user_votes(user_id)
|
101 |
-
logger.info(LogFormatter.success(f"Found {len(votes)} votes"))
|
102 |
-
return votes
|
103 |
-
except Exception as e:
|
104 |
-
logger.error(LogFormatter.error("Failed to get user votes", e))
|
105 |
-
raise HTTPException(status_code=400, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/api/router.py
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
from fastapi import APIRouter
|
2 |
-
|
3 |
-
from app.api.endpoints import leaderboard, votes, models
|
4 |
-
|
5 |
-
router = APIRouter()
|
6 |
-
|
7 |
-
router.include_router(leaderboard.router, prefix="/leaderboard", tags=["leaderboard"])
|
8 |
-
router.include_router(votes.router, prefix="/votes", tags=["votes"])
|
9 |
-
router.include_router(models.router, prefix="/models", tags=["models"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/asgi.py
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
ASGI entry point for the Open LLM Leaderboard API.
|
3 |
-
"""
|
4 |
-
import os
|
5 |
-
import uvicorn
|
6 |
-
import logging
|
7 |
-
import logging.config
|
8 |
-
from fastapi import FastAPI
|
9 |
-
from fastapi.middleware.cors import CORSMiddleware
|
10 |
-
from fastapi.middleware.gzip import GZipMiddleware
|
11 |
-
import sys
|
12 |
-
|
13 |
-
from app.api.router import router
|
14 |
-
from app.core.fastapi_cache import setup_cache
|
15 |
-
from app.core.formatting import LogFormatter
|
16 |
-
from app.config import hf_config
|
17 |
-
|
18 |
-
# Configure logging before anything else
|
19 |
-
LOGGING_CONFIG = {
|
20 |
-
"version": 1,
|
21 |
-
"disable_existing_loggers": True,
|
22 |
-
"formatters": {
|
23 |
-
"default": {
|
24 |
-
"format": "%(name)s - %(levelname)s - %(message)s",
|
25 |
-
}
|
26 |
-
},
|
27 |
-
"handlers": {
|
28 |
-
"default": {
|
29 |
-
"formatter": "default",
|
30 |
-
"class": "logging.StreamHandler",
|
31 |
-
"stream": "ext://sys.stdout",
|
32 |
-
}
|
33 |
-
},
|
34 |
-
"loggers": {
|
35 |
-
"uvicorn": {
|
36 |
-
"handlers": ["default"],
|
37 |
-
"level": "WARNING",
|
38 |
-
"propagate": False,
|
39 |
-
},
|
40 |
-
"uvicorn.error": {
|
41 |
-
"level": "WARNING",
|
42 |
-
"handlers": ["default"],
|
43 |
-
"propagate": False,
|
44 |
-
},
|
45 |
-
"uvicorn.access": {
|
46 |
-
"handlers": ["default"],
|
47 |
-
"level": "WARNING",
|
48 |
-
"propagate": False,
|
49 |
-
},
|
50 |
-
"app": {
|
51 |
-
"handlers": ["default"],
|
52 |
-
"level": "WARNING",
|
53 |
-
"propagate": False,
|
54 |
-
}
|
55 |
-
},
|
56 |
-
"root": {
|
57 |
-
"handlers": ["default"],
|
58 |
-
"level": "WARNING",
|
59 |
-
}
|
60 |
-
}
|
61 |
-
|
62 |
-
# Apply logging configuration
|
63 |
-
logging.config.dictConfig(LOGGING_CONFIG)
|
64 |
-
logger = logging.getLogger("app")
|
65 |
-
|
66 |
-
# Create FastAPI application
|
67 |
-
app = FastAPI(
|
68 |
-
title="Open LLM Leaderboard",
|
69 |
-
version="1.0.0",
|
70 |
-
docs_url="/docs",
|
71 |
-
)
|
72 |
-
|
73 |
-
# Add CORS middleware
|
74 |
-
app.add_middleware(
|
75 |
-
CORSMiddleware,
|
76 |
-
allow_origins=["*"],
|
77 |
-
allow_credentials=True,
|
78 |
-
allow_methods=["*"],
|
79 |
-
allow_headers=["*"],
|
80 |
-
)
|
81 |
-
|
82 |
-
# Add GZIP compression
|
83 |
-
app.add_middleware(GZipMiddleware, minimum_size=500)
|
84 |
-
|
85 |
-
# Include API router
|
86 |
-
app.include_router(router, prefix="/api")
|
87 |
-
|
88 |
-
@app.on_event("startup")
|
89 |
-
async def startup_event():
|
90 |
-
"""Initialize services on startup"""
|
91 |
-
logger.info("\n")
|
92 |
-
logger.info(LogFormatter.section("APPLICATION STARTUP"))
|
93 |
-
|
94 |
-
# Log HF configuration
|
95 |
-
logger.info(LogFormatter.section("HUGGING FACE CONFIGURATION"))
|
96 |
-
logger.info(LogFormatter.info(f"Organization: {hf_config.HF_ORGANIZATION}"))
|
97 |
-
logger.info(LogFormatter.info(f"Token Status: {'Present' if hf_config.HF_TOKEN else 'Missing'}"))
|
98 |
-
logger.info(LogFormatter.info(f"Using repositories:"))
|
99 |
-
logger.info(LogFormatter.info(f" - Queue: {hf_config.QUEUE_REPO}"))
|
100 |
-
logger.info(LogFormatter.info(f" - Aggregated: {hf_config.AGGREGATED_REPO}"))
|
101 |
-
logger.info(LogFormatter.info(f" - Votes: {hf_config.VOTES_REPO}"))
|
102 |
-
logger.info(LogFormatter.info(f" - Official Providers: {hf_config.OFFICIAL_PROVIDERS_REPO}"))
|
103 |
-
|
104 |
-
# Setup cache
|
105 |
-
setup_cache()
|
106 |
-
logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/config/__init__.py
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Configuration module for the Open LLM Leaderboard backend.
|
3 |
-
All configuration values are imported from base.py to avoid circular dependencies.
|
4 |
-
"""
|
5 |
-
|
6 |
-
from .base import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/config/base.py
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from pathlib import Path
|
3 |
-
|
4 |
-
# Server configuration
|
5 |
-
HOST = "0.0.0.0"
|
6 |
-
PORT = 7860
|
7 |
-
WORKERS = 4
|
8 |
-
RELOAD = True if os.environ.get("ENVIRONMENT") == "development" else False
|
9 |
-
|
10 |
-
# CORS configuration
|
11 |
-
ORIGINS = ["http://localhost:3000"] if os.getenv("ENVIRONMENT") == "development" else ["*"]
|
12 |
-
|
13 |
-
# Cache configuration
|
14 |
-
CACHE_TTL = int(os.environ.get("CACHE_TTL", 300)) # 5 minutes default
|
15 |
-
|
16 |
-
# Rate limiting
|
17 |
-
RATE_LIMIT_PERIOD = 7 # days
|
18 |
-
RATE_LIMIT_QUOTA = 5
|
19 |
-
HAS_HIGHER_RATE_LIMIT = []
|
20 |
-
|
21 |
-
# HuggingFace configuration
|
22 |
-
HF_TOKEN = os.environ.get("HF_TOKEN")
|
23 |
-
HF_ORGANIZATION = "open-llm-leaderboard"
|
24 |
-
API = {
|
25 |
-
"INFERENCE": "https://api-inference.huggingface.co/models",
|
26 |
-
"HUB": "https://huggingface.co"
|
27 |
-
}
|
28 |
-
|
29 |
-
# Cache paths
|
30 |
-
CACHE_ROOT = Path(os.environ.get("HF_HOME", ".cache"))
|
31 |
-
DATASETS_CACHE = CACHE_ROOT / "datasets"
|
32 |
-
MODELS_CACHE = CACHE_ROOT / "models"
|
33 |
-
VOTES_CACHE = CACHE_ROOT / "votes"
|
34 |
-
EVAL_CACHE = CACHE_ROOT / "eval-queue"
|
35 |
-
|
36 |
-
# Repository configuration
|
37 |
-
QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
|
38 |
-
EVAL_REQUESTS_PATH = EVAL_CACHE / "eval_requests.jsonl"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/config/hf_config.py
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import logging
|
3 |
-
from typing import Optional
|
4 |
-
from huggingface_hub import HfApi
|
5 |
-
from pathlib import Path
|
6 |
-
from app.core.cache import cache_config
|
7 |
-
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
|
10 |
-
# Organization or user who owns the datasets
|
11 |
-
HF_ORGANIZATION = "open-llm-leaderboard"
|
12 |
-
|
13 |
-
# Get HF token directly from environment
|
14 |
-
HF_TOKEN = os.environ.get("HF_TOKEN")
|
15 |
-
if not HF_TOKEN:
|
16 |
-
logger.warning("HF_TOKEN not found in environment variables. Some features may be limited.")
|
17 |
-
|
18 |
-
# Initialize HF API
|
19 |
-
API = HfApi(token=HF_TOKEN)
|
20 |
-
|
21 |
-
# Repository configuration
|
22 |
-
QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
|
23 |
-
AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents"
|
24 |
-
VOTES_REPO = f"{HF_ORGANIZATION}/votes"
|
25 |
-
OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/official-providers"
|
26 |
-
|
27 |
-
# File paths from cache config
|
28 |
-
VOTES_PATH = cache_config.votes_file
|
29 |
-
EVAL_REQUESTS_PATH = cache_config.eval_requests_file
|
30 |
-
MODEL_CACHE_DIR = cache_config.models_cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/config/logging_config.py
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
import sys
|
3 |
-
from tqdm import tqdm
|
4 |
-
|
5 |
-
def get_tqdm_handler():
|
6 |
-
"""
|
7 |
-
Creates a special handler for tqdm that doesn't interfere with other logs.
|
8 |
-
"""
|
9 |
-
class TqdmLoggingHandler(logging.Handler):
|
10 |
-
def emit(self, record):
|
11 |
-
try:
|
12 |
-
msg = self.format(record)
|
13 |
-
tqdm.write(msg)
|
14 |
-
self.flush()
|
15 |
-
except Exception:
|
16 |
-
self.handleError(record)
|
17 |
-
|
18 |
-
return TqdmLoggingHandler()
|
19 |
-
|
20 |
-
def setup_service_logger(service_name: str) -> logging.Logger:
|
21 |
-
"""
|
22 |
-
Configure a specific logger for a given service.
|
23 |
-
"""
|
24 |
-
logger = logging.getLogger(f"app.services.{service_name}")
|
25 |
-
|
26 |
-
# If the logger already has handlers, don't reconfigure it
|
27 |
-
if logger.handlers:
|
28 |
-
return logger
|
29 |
-
|
30 |
-
# Add tqdm handler for this service
|
31 |
-
tqdm_handler = get_tqdm_handler()
|
32 |
-
tqdm_handler.setFormatter(logging.Formatter('%(name)s - %(levelname)s - %(message)s'))
|
33 |
-
logger.addHandler(tqdm_handler)
|
34 |
-
|
35 |
-
# Don't propagate logs to parent loggers
|
36 |
-
logger.propagate = False
|
37 |
-
|
38 |
-
return logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/core/cache.py
DELETED
@@ -1,109 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import shutil
|
3 |
-
from pathlib import Path
|
4 |
-
from datetime import timedelta
|
5 |
-
import logging
|
6 |
-
from app.core.formatting import LogFormatter
|
7 |
-
from app.config.base import (
|
8 |
-
CACHE_ROOT,
|
9 |
-
DATASETS_CACHE,
|
10 |
-
MODELS_CACHE,
|
11 |
-
VOTES_CACHE,
|
12 |
-
EVAL_CACHE,
|
13 |
-
CACHE_TTL
|
14 |
-
)
|
15 |
-
|
16 |
-
logger = logging.getLogger(__name__)
|
17 |
-
|
18 |
-
class CacheConfig:
|
19 |
-
def __init__(self):
|
20 |
-
# Get cache paths from config
|
21 |
-
self.cache_root = CACHE_ROOT
|
22 |
-
self.datasets_cache = DATASETS_CACHE
|
23 |
-
self.models_cache = MODELS_CACHE
|
24 |
-
self.votes_cache = VOTES_CACHE
|
25 |
-
self.eval_cache = EVAL_CACHE
|
26 |
-
|
27 |
-
# Specific files
|
28 |
-
self.votes_file = self.votes_cache / "votes_data.jsonl"
|
29 |
-
self.eval_requests_file = self.eval_cache / "eval_requests.jsonl"
|
30 |
-
|
31 |
-
# Cache TTL
|
32 |
-
self.cache_ttl = timedelta(seconds=CACHE_TTL)
|
33 |
-
|
34 |
-
self._initialize_cache_dirs()
|
35 |
-
self._setup_environment()
|
36 |
-
|
37 |
-
def _initialize_cache_dirs(self):
|
38 |
-
"""Initialize all necessary cache directories"""
|
39 |
-
try:
|
40 |
-
logger.info(LogFormatter.section("CACHE INITIALIZATION"))
|
41 |
-
|
42 |
-
cache_dirs = {
|
43 |
-
"Root": self.cache_root,
|
44 |
-
"Datasets": self.datasets_cache,
|
45 |
-
"Models": self.models_cache,
|
46 |
-
"Votes": self.votes_cache,
|
47 |
-
"Eval": self.eval_cache
|
48 |
-
}
|
49 |
-
|
50 |
-
for name, cache_dir in cache_dirs.items():
|
51 |
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
52 |
-
logger.info(LogFormatter.success(f"{name} cache directory: {cache_dir}"))
|
53 |
-
|
54 |
-
except Exception as e:
|
55 |
-
logger.error(LogFormatter.error("Failed to create cache directories", e))
|
56 |
-
raise
|
57 |
-
|
58 |
-
def _setup_environment(self):
|
59 |
-
"""Configure HuggingFace environment variables"""
|
60 |
-
logger.info(LogFormatter.subsection("ENVIRONMENT SETUP"))
|
61 |
-
|
62 |
-
env_vars = {
|
63 |
-
"HF_HOME": str(self.cache_root),
|
64 |
-
"HF_DATASETS_CACHE": str(self.datasets_cache)
|
65 |
-
}
|
66 |
-
|
67 |
-
for var, value in env_vars.items():
|
68 |
-
os.environ[var] = value
|
69 |
-
logger.info(LogFormatter.info(f"Set {var}={value}"))
|
70 |
-
|
71 |
-
|
72 |
-
def get_cache_path(self, cache_type: str) -> Path:
|
73 |
-
"""Returns the path for a specific cache type"""
|
74 |
-
cache_paths = {
|
75 |
-
"datasets": self.datasets_cache,
|
76 |
-
"models": self.models_cache,
|
77 |
-
"votes": self.votes_cache,
|
78 |
-
"eval": self.eval_cache
|
79 |
-
}
|
80 |
-
return cache_paths.get(cache_type, self.cache_root)
|
81 |
-
|
82 |
-
def flush_cache(self, cache_type: str = None):
|
83 |
-
"""Flush specified cache or all caches if no type is specified"""
|
84 |
-
try:
|
85 |
-
if cache_type:
|
86 |
-
logger.info(LogFormatter.section(f"FLUSHING {cache_type.upper()} CACHE"))
|
87 |
-
cache_dir = self.get_cache_path(cache_type)
|
88 |
-
if cache_dir.exists():
|
89 |
-
stats = {
|
90 |
-
"Cache_Type": cache_type,
|
91 |
-
"Directory": str(cache_dir)
|
92 |
-
}
|
93 |
-
for line in LogFormatter.tree(stats, "Cache Details"):
|
94 |
-
logger.info(line)
|
95 |
-
shutil.rmtree(cache_dir)
|
96 |
-
cache_dir.mkdir(parents=True, exist_ok=True)
|
97 |
-
logger.info(LogFormatter.success("Cache cleared successfully"))
|
98 |
-
else:
|
99 |
-
logger.info(LogFormatter.section("FLUSHING ALL CACHES"))
|
100 |
-
for cache_type in ["datasets", "models", "votes", "eval"]:
|
101 |
-
self.flush_cache(cache_type)
|
102 |
-
logger.info(LogFormatter.success("All caches cleared successfully"))
|
103 |
-
|
104 |
-
except Exception as e:
|
105 |
-
logger.error(LogFormatter.error("Failed to flush cache", e))
|
106 |
-
raise
|
107 |
-
|
108 |
-
# Singleton instance of cache configuration
|
109 |
-
cache_config = CacheConfig()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/core/fastapi_cache.py
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
from fastapi_cache import FastAPICache
|
2 |
-
from fastapi_cache.backends.inmemory import InMemoryBackend
|
3 |
-
from fastapi_cache.decorator import cache
|
4 |
-
from datetime import timedelta
|
5 |
-
from app.config import CACHE_TTL
|
6 |
-
import logging
|
7 |
-
from app.core.formatting import LogFormatter
|
8 |
-
|
9 |
-
logger = logging.getLogger(__name__)
|
10 |
-
|
11 |
-
def setup_cache():
|
12 |
-
"""Initialize FastAPI Cache with in-memory backend"""
|
13 |
-
FastAPICache.init(
|
14 |
-
backend=InMemoryBackend(),
|
15 |
-
prefix="fastapi-cache",
|
16 |
-
expire=CACHE_TTL
|
17 |
-
)
|
18 |
-
logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
|
19 |
-
|
20 |
-
def invalidate_cache_key(key: str):
|
21 |
-
"""Invalidate a specific cache key"""
|
22 |
-
try:
|
23 |
-
backend = FastAPICache.get_backend()
|
24 |
-
if hasattr(backend, 'delete'):
|
25 |
-
backend.delete(key)
|
26 |
-
logger.info(LogFormatter.success(f"Cache invalidated for key: {key}"))
|
27 |
-
else:
|
28 |
-
logger.warning(LogFormatter.warning("Cache backend does not support deletion"))
|
29 |
-
except Exception as e:
|
30 |
-
logger.error(LogFormatter.error(f"Failed to invalidate cache key: {key}", e))
|
31 |
-
|
32 |
-
def build_cache_key(namespace: str, *args) -> str:
|
33 |
-
"""Build a consistent cache key"""
|
34 |
-
key = f"fastapi-cache:{namespace}:{':'.join(str(arg) for arg in args)}"
|
35 |
-
logger.debug(LogFormatter.info(f"Built cache key: {key}"))
|
36 |
-
return key
|
37 |
-
|
38 |
-
def cached(expire: int = CACHE_TTL, key_builder=None):
|
39 |
-
"""Decorator for caching endpoint responses
|
40 |
-
|
41 |
-
Args:
|
42 |
-
expire (int): Cache TTL in seconds
|
43 |
-
key_builder (callable, optional): Custom key builder function
|
44 |
-
"""
|
45 |
-
return cache(
|
46 |
-
expire=expire,
|
47 |
-
key_builder=key_builder
|
48 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/core/formatting.py
DELETED
@@ -1,104 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
from typing import Dict, Any, List, Optional
|
3 |
-
|
4 |
-
logger = logging.getLogger(__name__)
|
5 |
-
|
6 |
-
class LogFormatter:
|
7 |
-
"""Utility class for consistent log formatting across the application"""
|
8 |
-
|
9 |
-
@staticmethod
|
10 |
-
def section(title: str) -> str:
|
11 |
-
"""Create a section header"""
|
12 |
-
return f"\n{'='*20} {title.upper()} {'='*20}"
|
13 |
-
|
14 |
-
@staticmethod
|
15 |
-
def subsection(title: str) -> str:
|
16 |
-
"""Create a subsection header"""
|
17 |
-
return f"\n{'─'*20} {title} {'─'*20}"
|
18 |
-
|
19 |
-
@staticmethod
|
20 |
-
def tree(items: Dict[str, Any], title: str = None) -> List[str]:
|
21 |
-
"""Create a tree view of dictionary data"""
|
22 |
-
lines = []
|
23 |
-
if title:
|
24 |
-
lines.append(f"📊 {title}:")
|
25 |
-
|
26 |
-
# Get the maximum length for alignment
|
27 |
-
max_key_length = max(len(str(k)) for k in items.keys())
|
28 |
-
|
29 |
-
# Format each item
|
30 |
-
for i, (key, value) in enumerate(items.items()):
|
31 |
-
prefix = "└──" if i == len(items) - 1 else "├──"
|
32 |
-
if isinstance(value, (int, float)):
|
33 |
-
value = f"{value:,}" # Add thousand separators
|
34 |
-
lines.append(f"{prefix} {str(key):<{max_key_length}}: {value}")
|
35 |
-
|
36 |
-
return lines
|
37 |
-
|
38 |
-
@staticmethod
|
39 |
-
def stats(stats: Dict[str, int], title: str = None) -> List[str]:
|
40 |
-
"""Format statistics with icons"""
|
41 |
-
lines = []
|
42 |
-
if title:
|
43 |
-
lines.append(f"📊 {title}:")
|
44 |
-
|
45 |
-
# Get the maximum length for alignment
|
46 |
-
max_key_length = max(len(str(k)) for k in stats.keys())
|
47 |
-
|
48 |
-
# Format each stat with an appropriate icon
|
49 |
-
icons = {
|
50 |
-
"total": "📌",
|
51 |
-
"success": "✅",
|
52 |
-
"error": "❌",
|
53 |
-
"pending": "⏳",
|
54 |
-
"processing": "⚙️",
|
55 |
-
"finished": "✨",
|
56 |
-
"evaluating": "🔄",
|
57 |
-
"downloads": "⬇️",
|
58 |
-
"files": "📁",
|
59 |
-
"cached": "💾",
|
60 |
-
"size": "📏",
|
61 |
-
"time": "⏱️",
|
62 |
-
"rate": "🚀"
|
63 |
-
}
|
64 |
-
|
65 |
-
# Format each item
|
66 |
-
for i, (key, value) in enumerate(stats.items()):
|
67 |
-
prefix = "└──" if i == len(stats) - 1 else "├──"
|
68 |
-
icon = icons.get(key.lower().split('_')[0], "•")
|
69 |
-
if isinstance(value, (int, float)):
|
70 |
-
value = f"{value:,}" # Add thousand separators
|
71 |
-
lines.append(f"{prefix} {icon} {str(key):<{max_key_length}}: {value}")
|
72 |
-
|
73 |
-
return lines
|
74 |
-
|
75 |
-
@staticmethod
|
76 |
-
def progress_bar(current: int, total: int, width: int = 20) -> str:
|
77 |
-
"""Create a progress bar"""
|
78 |
-
percentage = (current * 100) // total
|
79 |
-
filled = "█" * (percentage * width // 100)
|
80 |
-
empty = "░" * (width - len(filled))
|
81 |
-
return f"{filled}{empty} {percentage:3d}%"
|
82 |
-
|
83 |
-
@staticmethod
|
84 |
-
def error(message: str, error: Optional[Exception] = None) -> str:
|
85 |
-
"""Format error message"""
|
86 |
-
error_msg = f"\n❌ Error: {message}"
|
87 |
-
if error:
|
88 |
-
error_msg += f"\n └── Details: {str(error)}"
|
89 |
-
return error_msg
|
90 |
-
|
91 |
-
@staticmethod
|
92 |
-
def success(message: str) -> str:
|
93 |
-
"""Format success message"""
|
94 |
-
return f"✅ {message}"
|
95 |
-
|
96 |
-
@staticmethod
|
97 |
-
def warning(message: str) -> str:
|
98 |
-
"""Format warning message"""
|
99 |
-
return f"⚠️ {message}"
|
100 |
-
|
101 |
-
@staticmethod
|
102 |
-
def info(message: str) -> str:
|
103 |
-
"""Format info message"""
|
104 |
-
return f"ℹ️ {message}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/main.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
-
from app.config.logging_config import setup_logging
|
3 |
-
import logging
|
4 |
-
|
5 |
-
# Initialize logging configuration
|
6 |
-
setup_logging()
|
7 |
-
logger = logging.getLogger(__name__)
|
8 |
-
|
9 |
-
app = FastAPI(title="Open LLM Leaderboard API")
|
10 |
-
|
11 |
-
@app.on_event("startup")
|
12 |
-
async def startup_event():
|
13 |
-
logger.info("Starting up the application...")
|
14 |
-
|
15 |
-
# Import and include routers after app initialization
|
16 |
-
from app.api import models, votes
|
17 |
-
app.include_router(models.router, prefix="/api", tags=["models"])
|
18 |
-
app.include_router(votes.router, prefix="/api", tags=["votes"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/__init__.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
from . import hf_service, leaderboard, votes, models
|
2 |
-
|
3 |
-
__all__ = ["hf_service", "leaderboard", "votes", "models"]
|
|
|
|
|
|
|
|
backend/app/services/hf_service.py
DELETED
@@ -1,50 +0,0 @@
|
|
1 |
-
from typing import Optional
|
2 |
-
from huggingface_hub import HfApi
|
3 |
-
from app.config import HF_TOKEN, API
|
4 |
-
from app.core.cache import cache_config
|
5 |
-
from app.core.formatting import LogFormatter
|
6 |
-
import logging
|
7 |
-
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
|
10 |
-
class HuggingFaceService:
|
11 |
-
def __init__(self):
|
12 |
-
self.api = API
|
13 |
-
self.token = HF_TOKEN
|
14 |
-
self.cache_dir = cache_config.models_cache
|
15 |
-
|
16 |
-
async def check_authentication(self) -> bool:
|
17 |
-
"""Check if the HF token is valid"""
|
18 |
-
if not self.token:
|
19 |
-
return False
|
20 |
-
try:
|
21 |
-
logger.info(LogFormatter.info("Checking HF token validity..."))
|
22 |
-
self.api.get_token_permission()
|
23 |
-
logger.info(LogFormatter.success("HF token is valid"))
|
24 |
-
return True
|
25 |
-
except Exception as e:
|
26 |
-
logger.error(LogFormatter.error("HF token validation failed", e))
|
27 |
-
return False
|
28 |
-
|
29 |
-
async def get_user_info(self) -> Optional[dict]:
|
30 |
-
"""Get information about the authenticated user"""
|
31 |
-
try:
|
32 |
-
logger.info(LogFormatter.info("Fetching user information..."))
|
33 |
-
info = self.api.get_token_permission()
|
34 |
-
logger.info(LogFormatter.success(f"User info retrieved for: {info.get('user', 'Unknown')}"))
|
35 |
-
return info
|
36 |
-
except Exception as e:
|
37 |
-
logger.error(LogFormatter.error("Failed to get user info", e))
|
38 |
-
return None
|
39 |
-
|
40 |
-
def _log_repo_operation(self, operation: str, repo: str, details: str = None):
|
41 |
-
"""Helper to log repository operations"""
|
42 |
-
logger.info(LogFormatter.section(f"HF REPOSITORY OPERATION - {operation.upper()}"))
|
43 |
-
stats = {
|
44 |
-
"Operation": operation,
|
45 |
-
"Repository": repo,
|
46 |
-
}
|
47 |
-
if details:
|
48 |
-
stats["Details"] = details
|
49 |
-
for line in LogFormatter.tree(stats):
|
50 |
-
logger.info(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/leaderboard.py
DELETED
@@ -1,208 +0,0 @@
|
|
1 |
-
from app.core.cache import cache_config
|
2 |
-
from datetime import datetime
|
3 |
-
from typing import List, Dict, Any
|
4 |
-
import datasets
|
5 |
-
from fastapi import HTTPException
|
6 |
-
import logging
|
7 |
-
from app.config.base import HF_ORGANIZATION
|
8 |
-
from app.core.formatting import LogFormatter
|
9 |
-
|
10 |
-
logger = logging.getLogger(__name__)
|
11 |
-
|
12 |
-
class LeaderboardService:
|
13 |
-
def __init__(self):
|
14 |
-
pass
|
15 |
-
|
16 |
-
async def fetch_raw_data(self) -> List[Dict[str, Any]]:
|
17 |
-
"""Fetch raw leaderboard data from HuggingFace dataset"""
|
18 |
-
try:
|
19 |
-
logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
|
20 |
-
logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
|
21 |
-
|
22 |
-
dataset = datasets.load_dataset(
|
23 |
-
f"{HF_ORGANIZATION}/contents",
|
24 |
-
cache_dir=cache_config.get_cache_path("datasets")
|
25 |
-
)["train"]
|
26 |
-
|
27 |
-
df = dataset.to_pandas()
|
28 |
-
data = df.to_dict('records')
|
29 |
-
|
30 |
-
stats = {
|
31 |
-
"Total_Entries": len(data),
|
32 |
-
"Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
|
33 |
-
}
|
34 |
-
for line in LogFormatter.stats(stats, "Dataset Statistics"):
|
35 |
-
logger.info(line)
|
36 |
-
|
37 |
-
return data
|
38 |
-
|
39 |
-
except Exception as e:
|
40 |
-
logger.error(LogFormatter.error("Failed to fetch leaderboard data", e))
|
41 |
-
raise HTTPException(status_code=500, detail=str(e))
|
42 |
-
|
43 |
-
async def get_formatted_data(self) -> List[Dict[str, Any]]:
|
44 |
-
"""Get formatted leaderboard data"""
|
45 |
-
try:
|
46 |
-
logger.info(LogFormatter.section("FORMATTING LEADERBOARD DATA"))
|
47 |
-
|
48 |
-
raw_data = await self.fetch_raw_data()
|
49 |
-
formatted_data = []
|
50 |
-
type_counts = {}
|
51 |
-
error_count = 0
|
52 |
-
|
53 |
-
# Initialize progress tracking
|
54 |
-
total_items = len(raw_data)
|
55 |
-
logger.info(LogFormatter.info(f"Processing {total_items:,} entries..."))
|
56 |
-
|
57 |
-
for i, item in enumerate(raw_data, 1):
|
58 |
-
try:
|
59 |
-
formatted_item = await self.transform_data(item)
|
60 |
-
formatted_data.append(formatted_item)
|
61 |
-
|
62 |
-
# Count model types
|
63 |
-
model_type = formatted_item["model"]["type"]
|
64 |
-
type_counts[model_type] = type_counts.get(model_type, 0) + 1
|
65 |
-
|
66 |
-
except Exception as e:
|
67 |
-
error_count += 1
|
68 |
-
logger.error(LogFormatter.error(f"Failed to format entry {i}/{total_items}", e))
|
69 |
-
continue
|
70 |
-
|
71 |
-
# Log progress every 10%
|
72 |
-
if i % max(1, total_items // 10) == 0:
|
73 |
-
progress = (i / total_items) * 100
|
74 |
-
logger.info(LogFormatter.info(f"Progress: {LogFormatter.progress_bar(i, total_items)}"))
|
75 |
-
|
76 |
-
# Log final statistics
|
77 |
-
stats = {
|
78 |
-
"Total_Processed": total_items,
|
79 |
-
"Successful": len(formatted_data),
|
80 |
-
"Failed": error_count
|
81 |
-
}
|
82 |
-
logger.info(LogFormatter.section("PROCESSING SUMMARY"))
|
83 |
-
for line in LogFormatter.stats(stats, "Processing Statistics"):
|
84 |
-
logger.info(line)
|
85 |
-
|
86 |
-
# Log model type distribution
|
87 |
-
type_stats = {f"Type_{k}": v for k, v in type_counts.items()}
|
88 |
-
logger.info(LogFormatter.subsection("MODEL TYPE DISTRIBUTION"))
|
89 |
-
for line in LogFormatter.stats(type_stats):
|
90 |
-
logger.info(line)
|
91 |
-
|
92 |
-
return formatted_data
|
93 |
-
|
94 |
-
except Exception as e:
|
95 |
-
logger.error(LogFormatter.error("Failed to format leaderboard data", e))
|
96 |
-
raise HTTPException(status_code=500, detail=str(e))
|
97 |
-
|
98 |
-
async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
99 |
-
"""Transform raw data into the format expected by the frontend"""
|
100 |
-
try:
|
101 |
-
# Extract model name for logging
|
102 |
-
model_name = data.get("fullname", "Unknown")
|
103 |
-
logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
|
104 |
-
|
105 |
-
# Create unique ID combining model name, precision, sha and chat template status
|
106 |
-
unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
|
107 |
-
|
108 |
-
evaluations = {
|
109 |
-
"ifeval": {
|
110 |
-
"name": "IFEval",
|
111 |
-
"value": data.get("IFEval Raw", 0),
|
112 |
-
"normalized_score": data.get("IFEval", 0)
|
113 |
-
},
|
114 |
-
"bbh": {
|
115 |
-
"name": "BBH",
|
116 |
-
"value": data.get("BBH Raw", 0),
|
117 |
-
"normalized_score": data.get("BBH", 0)
|
118 |
-
},
|
119 |
-
"math": {
|
120 |
-
"name": "MATH Level 5",
|
121 |
-
"value": data.get("MATH Lvl 5 Raw", 0),
|
122 |
-
"normalized_score": data.get("MATH Lvl 5", 0)
|
123 |
-
},
|
124 |
-
"gpqa": {
|
125 |
-
"name": "GPQA",
|
126 |
-
"value": data.get("GPQA Raw", 0),
|
127 |
-
"normalized_score": data.get("GPQA", 0)
|
128 |
-
},
|
129 |
-
"musr": {
|
130 |
-
"name": "MUSR",
|
131 |
-
"value": data.get("MUSR Raw", 0),
|
132 |
-
"normalized_score": data.get("MUSR", 0)
|
133 |
-
},
|
134 |
-
"mmlu_pro": {
|
135 |
-
"name": "MMLU-PRO",
|
136 |
-
"value": data.get("MMLU-PRO Raw", 0),
|
137 |
-
"normalized_score": data.get("MMLU-PRO", 0)
|
138 |
-
}
|
139 |
-
}
|
140 |
-
|
141 |
-
features = {
|
142 |
-
"is_not_available_on_hub": data.get("Available on the hub", False),
|
143 |
-
"is_merged": data.get("Merged", False),
|
144 |
-
"is_moe": data.get("MoE", False),
|
145 |
-
"is_flagged": data.get("Flagged", False),
|
146 |
-
"is_official_provider": data.get("Official Providers", False)
|
147 |
-
}
|
148 |
-
|
149 |
-
metadata = {
|
150 |
-
"upload_date": data.get("Upload To Hub Date"),
|
151 |
-
"submission_date": data.get("Submission Date"),
|
152 |
-
"generation": data.get("Generation"),
|
153 |
-
"base_model": data.get("Base Model"),
|
154 |
-
"hub_license": data.get("Hub License"),
|
155 |
-
"hub_hearts": data.get("Hub ❤️"),
|
156 |
-
"params_billions": data.get("#Params (B)"),
|
157 |
-
"co2_cost": data.get("CO₂ cost (kg)", 0)
|
158 |
-
}
|
159 |
-
|
160 |
-
# Clean model type by removing emojis if present
|
161 |
-
original_type = data.get("Type", "")
|
162 |
-
model_type = original_type.lower().strip()
|
163 |
-
|
164 |
-
# Remove emojis and parentheses
|
165 |
-
if "(" in model_type:
|
166 |
-
model_type = model_type.split("(")[0].strip()
|
167 |
-
model_type = ''.join(c for c in model_type if not c in '🔶🟢🟩💬🤝🌸 ')
|
168 |
-
|
169 |
-
# Map old model types to new ones
|
170 |
-
model_type_mapping = {
|
171 |
-
"fine-tuned": "fined-tuned-on-domain-specific-dataset",
|
172 |
-
"fine tuned": "fined-tuned-on-domain-specific-dataset",
|
173 |
-
"finetuned": "fined-tuned-on-domain-specific-dataset",
|
174 |
-
"fine_tuned": "fined-tuned-on-domain-specific-dataset",
|
175 |
-
"ft": "fined-tuned-on-domain-specific-dataset",
|
176 |
-
"finetuning": "fined-tuned-on-domain-specific-dataset",
|
177 |
-
"fine tuning": "fined-tuned-on-domain-specific-dataset",
|
178 |
-
"fine-tuning": "fined-tuned-on-domain-specific-dataset"
|
179 |
-
}
|
180 |
-
|
181 |
-
mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
|
182 |
-
|
183 |
-
if mapped_type != model_type:
|
184 |
-
logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
|
185 |
-
|
186 |
-
transformed_data = {
|
187 |
-
"id": unique_id,
|
188 |
-
"model": {
|
189 |
-
"name": data.get("fullname"),
|
190 |
-
"sha": data.get("Model sha"),
|
191 |
-
"precision": data.get("Precision"),
|
192 |
-
"type": mapped_type,
|
193 |
-
"weight_type": data.get("Weight type"),
|
194 |
-
"architecture": data.get("Architecture"),
|
195 |
-
"average_score": data.get("Average ⬆️"),
|
196 |
-
"has_chat_template": data.get("Chat Template", False)
|
197 |
-
},
|
198 |
-
"evaluations": evaluations,
|
199 |
-
"features": features,
|
200 |
-
"metadata": metadata
|
201 |
-
}
|
202 |
-
|
203 |
-
logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
|
204 |
-
return transformed_data
|
205 |
-
|
206 |
-
except Exception as e:
|
207 |
-
logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
|
208 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/models.py
DELETED
@@ -1,587 +0,0 @@
|
|
1 |
-
from datetime import datetime, timezone
|
2 |
-
from typing import Dict, Any, Optional, List
|
3 |
-
import json
|
4 |
-
import os
|
5 |
-
from pathlib import Path
|
6 |
-
import logging
|
7 |
-
import aiohttp
|
8 |
-
import asyncio
|
9 |
-
import time
|
10 |
-
from huggingface_hub import HfApi, CommitOperationAdd
|
11 |
-
from huggingface_hub.utils import build_hf_headers
|
12 |
-
from datasets import disable_progress_bar
|
13 |
-
import sys
|
14 |
-
import contextlib
|
15 |
-
from concurrent.futures import ThreadPoolExecutor
|
16 |
-
import tempfile
|
17 |
-
|
18 |
-
from app.config import (
|
19 |
-
QUEUE_REPO,
|
20 |
-
HF_TOKEN,
|
21 |
-
EVAL_REQUESTS_PATH
|
22 |
-
)
|
23 |
-
from app.config.hf_config import HF_ORGANIZATION
|
24 |
-
from app.services.hf_service import HuggingFaceService
|
25 |
-
from app.utils.model_validation import ModelValidator
|
26 |
-
from app.services.votes import VoteService
|
27 |
-
from app.core.cache import cache_config
|
28 |
-
from app.core.formatting import LogFormatter
|
29 |
-
|
30 |
-
# Disable datasets progress bars globally
|
31 |
-
disable_progress_bar()
|
32 |
-
|
33 |
-
logger = logging.getLogger(__name__)
|
34 |
-
|
35 |
-
# Context manager to temporarily disable stdout and stderr
|
36 |
-
@contextlib.contextmanager
|
37 |
-
def suppress_output():
|
38 |
-
stdout = sys.stdout
|
39 |
-
stderr = sys.stderr
|
40 |
-
devnull = open(os.devnull, 'w')
|
41 |
-
try:
|
42 |
-
sys.stdout = devnull
|
43 |
-
sys.stderr = devnull
|
44 |
-
yield
|
45 |
-
finally:
|
46 |
-
sys.stdout = stdout
|
47 |
-
sys.stderr = stderr
|
48 |
-
devnull.close()
|
49 |
-
|
50 |
-
class ProgressTracker:
|
51 |
-
def __init__(self, total: int, desc: str = "Progress", update_frequency: int = 10):
|
52 |
-
self.total = total
|
53 |
-
self.current = 0
|
54 |
-
self.desc = desc
|
55 |
-
self.start_time = time.time()
|
56 |
-
self.update_frequency = update_frequency # Percentage steps
|
57 |
-
self.last_update = -1
|
58 |
-
|
59 |
-
# Initial log with fancy formatting
|
60 |
-
logger.info(LogFormatter.section(desc))
|
61 |
-
logger.info(LogFormatter.info(f"Starting processing of {total:,} items..."))
|
62 |
-
sys.stdout.flush()
|
63 |
-
|
64 |
-
def update(self, n: int = 1):
|
65 |
-
self.current += n
|
66 |
-
current_percentage = (self.current * 100) // self.total
|
67 |
-
|
68 |
-
# Only update on frequency steps (e.g., 0%, 10%, 20%, etc.)
|
69 |
-
if current_percentage >= self.last_update + self.update_frequency or current_percentage == 100:
|
70 |
-
elapsed = time.time() - self.start_time
|
71 |
-
rate = self.current / elapsed if elapsed > 0 else 0
|
72 |
-
remaining = (self.total - self.current) / rate if rate > 0 else 0
|
73 |
-
|
74 |
-
# Create progress stats
|
75 |
-
stats = {
|
76 |
-
"Progress": LogFormatter.progress_bar(self.current, self.total),
|
77 |
-
"Items": f"{self.current:,}/{self.total:,}",
|
78 |
-
"Time": f"⏱️ {elapsed:.1f}s elapsed, {remaining:.1f}s remaining",
|
79 |
-
"Rate": f"🚀 {rate:.1f} items/s"
|
80 |
-
}
|
81 |
-
|
82 |
-
# Log progress using tree format
|
83 |
-
for line in LogFormatter.tree(stats):
|
84 |
-
logger.info(line)
|
85 |
-
sys.stdout.flush()
|
86 |
-
|
87 |
-
self.last_update = (current_percentage // self.update_frequency) * self.update_frequency
|
88 |
-
|
89 |
-
def close(self):
|
90 |
-
elapsed = time.time() - self.start_time
|
91 |
-
rate = self.total / elapsed if elapsed > 0 else 0
|
92 |
-
|
93 |
-
# Final summary with fancy formatting
|
94 |
-
logger.info(LogFormatter.section("COMPLETED"))
|
95 |
-
stats = {
|
96 |
-
"Total": f"{self.total:,} items",
|
97 |
-
"Time": f"{elapsed:.1f}s",
|
98 |
-
"Rate": f"{rate:.1f} items/s"
|
99 |
-
}
|
100 |
-
for line in LogFormatter.stats(stats):
|
101 |
-
logger.info(line)
|
102 |
-
logger.info("="*50)
|
103 |
-
sys.stdout.flush()
|
104 |
-
|
105 |
-
class ModelService(HuggingFaceService):
|
106 |
-
_instance: Optional['ModelService'] = None
|
107 |
-
_initialized = False
|
108 |
-
|
109 |
-
def __new__(cls):
|
110 |
-
if cls._instance is None:
|
111 |
-
logger.info(LogFormatter.info("Creating new ModelService instance"))
|
112 |
-
cls._instance = super(ModelService, cls).__new__(cls)
|
113 |
-
return cls._instance
|
114 |
-
|
115 |
-
def __init__(self):
|
116 |
-
if not hasattr(self, '_init_done'):
|
117 |
-
logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
|
118 |
-
super().__init__()
|
119 |
-
self.validator = ModelValidator()
|
120 |
-
self.vote_service = VoteService()
|
121 |
-
self.eval_requests_path = cache_config.eval_requests_file
|
122 |
-
logger.info(LogFormatter.info(f"Using eval requests path: {self.eval_requests_path}"))
|
123 |
-
|
124 |
-
self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
|
125 |
-
self.hf_api = HfApi(token=HF_TOKEN)
|
126 |
-
self.cached_models = None
|
127 |
-
self.last_cache_update = 0
|
128 |
-
self.cache_ttl = cache_config.cache_ttl.total_seconds()
|
129 |
-
self._init_done = True
|
130 |
-
logger.info(LogFormatter.success("Initialization complete"))
|
131 |
-
|
132 |
-
async def _download_and_process_file(self, file: str, session: aiohttp.ClientSession, progress: ProgressTracker) -> Optional[Dict]:
|
133 |
-
"""Download and process a file asynchronously"""
|
134 |
-
try:
|
135 |
-
# Build file URL
|
136 |
-
url = f"https://huggingface.co/datasets/{QUEUE_REPO}/resolve/main/{file}"
|
137 |
-
headers = build_hf_headers(token=self.token)
|
138 |
-
|
139 |
-
# Download file
|
140 |
-
async with session.get(url, headers=headers) as response:
|
141 |
-
if response.status != 200:
|
142 |
-
logger.error(LogFormatter.error(f"Failed to download {file}", f"HTTP {response.status}"))
|
143 |
-
progress.update()
|
144 |
-
return None
|
145 |
-
|
146 |
-
try:
|
147 |
-
# First read content as text
|
148 |
-
text_content = await response.text()
|
149 |
-
# Then parse JSON
|
150 |
-
content = json.loads(text_content)
|
151 |
-
except json.JSONDecodeError as e:
|
152 |
-
logger.error(LogFormatter.error(f"Failed to decode JSON from {file}", e))
|
153 |
-
progress.update()
|
154 |
-
return None
|
155 |
-
|
156 |
-
# Get status and determine target status
|
157 |
-
status = content.get("status", "PENDING").upper()
|
158 |
-
target_status = None
|
159 |
-
status_map = {
|
160 |
-
"PENDING": ["PENDING"],
|
161 |
-
"EVALUATING": ["RUNNING"],
|
162 |
-
"FINISHED": ["FINISHED"]
|
163 |
-
}
|
164 |
-
|
165 |
-
for target, source_statuses in status_map.items():
|
166 |
-
if status in source_statuses:
|
167 |
-
target_status = target
|
168 |
-
break
|
169 |
-
|
170 |
-
if not target_status:
|
171 |
-
progress.update()
|
172 |
-
return None
|
173 |
-
|
174 |
-
# Calculate wait time
|
175 |
-
try:
|
176 |
-
submit_time = datetime.fromisoformat(content["submitted_time"].replace("Z", "+00:00"))
|
177 |
-
if submit_time.tzinfo is None:
|
178 |
-
submit_time = submit_time.replace(tzinfo=timezone.utc)
|
179 |
-
current_time = datetime.now(timezone.utc)
|
180 |
-
wait_time = current_time - submit_time
|
181 |
-
|
182 |
-
model_info = {
|
183 |
-
"name": content["model"],
|
184 |
-
"submitter": content.get("sender", "Unknown"),
|
185 |
-
"revision": content["revision"],
|
186 |
-
"wait_time": f"{wait_time.total_seconds():.1f}s",
|
187 |
-
"submission_time": content["submitted_time"],
|
188 |
-
"status": target_status,
|
189 |
-
"precision": content.get("precision", "Unknown")
|
190 |
-
}
|
191 |
-
|
192 |
-
progress.update()
|
193 |
-
return model_info
|
194 |
-
|
195 |
-
except (ValueError, TypeError) as e:
|
196 |
-
logger.error(LogFormatter.error(f"Failed to process {file}", e))
|
197 |
-
progress.update()
|
198 |
-
return None
|
199 |
-
|
200 |
-
except Exception as e:
|
201 |
-
logger.error(LogFormatter.error(f"Failed to load {file}", e))
|
202 |
-
progress.update()
|
203 |
-
return None
|
204 |
-
|
205 |
-
async def _refresh_models_cache(self):
|
206 |
-
"""Refresh the models cache"""
|
207 |
-
try:
|
208 |
-
logger.info(LogFormatter.section("CACHE REFRESH"))
|
209 |
-
self._log_repo_operation("read", f"{HF_ORGANIZATION}/requests", "Refreshing models cache")
|
210 |
-
|
211 |
-
# Initialize models dictionary
|
212 |
-
models = {
|
213 |
-
"finished": [],
|
214 |
-
"evaluating": [],
|
215 |
-
"pending": []
|
216 |
-
}
|
217 |
-
|
218 |
-
try:
|
219 |
-
logger.info(LogFormatter.subsection("DATASET LOADING"))
|
220 |
-
logger.info(LogFormatter.info("Loading dataset files..."))
|
221 |
-
|
222 |
-
# List files in repository
|
223 |
-
with suppress_output():
|
224 |
-
files = self.hf_api.list_repo_files(
|
225 |
-
repo_id=QUEUE_REPO,
|
226 |
-
repo_type="dataset",
|
227 |
-
token=self.token
|
228 |
-
)
|
229 |
-
|
230 |
-
# Filter JSON files
|
231 |
-
json_files = [f for f in files if f.endswith('.json')]
|
232 |
-
total_files = len(json_files)
|
233 |
-
|
234 |
-
# Log repository stats
|
235 |
-
stats = {
|
236 |
-
"Total_Files": len(files),
|
237 |
-
"JSON_Files": total_files,
|
238 |
-
}
|
239 |
-
for line in LogFormatter.stats(stats, "Repository Statistics"):
|
240 |
-
logger.info(line)
|
241 |
-
|
242 |
-
if not json_files:
|
243 |
-
raise Exception("No JSON files found in repository")
|
244 |
-
|
245 |
-
# Initialize progress tracker
|
246 |
-
progress = ProgressTracker(total_files, "PROCESSING FILES")
|
247 |
-
|
248 |
-
try:
|
249 |
-
# Create aiohttp session to reuse connections
|
250 |
-
async with aiohttp.ClientSession() as session:
|
251 |
-
# Process files in chunks
|
252 |
-
chunk_size = 50
|
253 |
-
|
254 |
-
for i in range(0, len(json_files), chunk_size):
|
255 |
-
chunk = json_files[i:i + chunk_size]
|
256 |
-
chunk_tasks = [
|
257 |
-
self._download_and_process_file(file, session, progress)
|
258 |
-
for file in chunk
|
259 |
-
]
|
260 |
-
results = await asyncio.gather(*chunk_tasks)
|
261 |
-
|
262 |
-
# Process results
|
263 |
-
for result in results:
|
264 |
-
if result:
|
265 |
-
status = result.pop("status")
|
266 |
-
models[status.lower()].append(result)
|
267 |
-
|
268 |
-
finally:
|
269 |
-
progress.close()
|
270 |
-
|
271 |
-
# Final summary with fancy formatting
|
272 |
-
logger.info(LogFormatter.section("CACHE SUMMARY"))
|
273 |
-
stats = {
|
274 |
-
"Finished": len(models["finished"]),
|
275 |
-
"Evaluating": len(models["evaluating"]),
|
276 |
-
"Pending": len(models["pending"])
|
277 |
-
}
|
278 |
-
for line in LogFormatter.stats(stats, "Models by Status"):
|
279 |
-
logger.info(line)
|
280 |
-
logger.info("="*50)
|
281 |
-
|
282 |
-
except Exception as e:
|
283 |
-
logger.error(LogFormatter.error("Error processing files", e))
|
284 |
-
raise
|
285 |
-
|
286 |
-
# Update cache
|
287 |
-
self.cached_models = models
|
288 |
-
self.last_cache_update = time.time()
|
289 |
-
logger.info(LogFormatter.success("Cache updated successfully"))
|
290 |
-
|
291 |
-
return models
|
292 |
-
|
293 |
-
except Exception as e:
|
294 |
-
logger.error(LogFormatter.error("Cache refresh failed", e))
|
295 |
-
raise
|
296 |
-
|
297 |
-
async def initialize(self):
|
298 |
-
"""Initialize the model service"""
|
299 |
-
if self._initialized:
|
300 |
-
logger.info(LogFormatter.info("Service already initialized, using cached data"))
|
301 |
-
return
|
302 |
-
|
303 |
-
try:
|
304 |
-
logger.info(LogFormatter.section("MODEL SERVICE INITIALIZATION"))
|
305 |
-
|
306 |
-
# Check if cache already exists
|
307 |
-
cache_path = cache_config.get_cache_path("datasets")
|
308 |
-
if not cache_path.exists() or not any(cache_path.iterdir()):
|
309 |
-
logger.info(LogFormatter.info("No existing cache found, initializing datasets cache..."))
|
310 |
-
cache_config.flush_cache("datasets")
|
311 |
-
else:
|
312 |
-
logger.info(LogFormatter.info("Using existing datasets cache"))
|
313 |
-
|
314 |
-
# Ensure eval requests directory exists
|
315 |
-
self.eval_requests_path.parent.mkdir(parents=True, exist_ok=True)
|
316 |
-
logger.info(LogFormatter.info(f"Eval requests directory: {self.eval_requests_path}"))
|
317 |
-
|
318 |
-
# List existing files
|
319 |
-
if self.eval_requests_path.exists():
|
320 |
-
files = list(self.eval_requests_path.glob("**/*.json"))
|
321 |
-
stats = {
|
322 |
-
"Total_Files": len(files),
|
323 |
-
"Directory": str(self.eval_requests_path)
|
324 |
-
}
|
325 |
-
for line in LogFormatter.stats(stats, "Eval Requests"):
|
326 |
-
logger.info(line)
|
327 |
-
|
328 |
-
# Load initial cache
|
329 |
-
await self._refresh_models_cache()
|
330 |
-
|
331 |
-
self._initialized = True
|
332 |
-
logger.info(LogFormatter.success("Model service initialization complete"))
|
333 |
-
|
334 |
-
except Exception as e:
|
335 |
-
logger.error(LogFormatter.error("Initialization failed", e))
|
336 |
-
raise
|
337 |
-
|
338 |
-
async def get_models(self) -> Dict[str, List[Dict[str, Any]]]:
|
339 |
-
"""Get all models with their status"""
|
340 |
-
if not self._initialized:
|
341 |
-
logger.info(LogFormatter.info("Service not initialized, initializing now..."))
|
342 |
-
await self.initialize()
|
343 |
-
|
344 |
-
current_time = time.time()
|
345 |
-
cache_age = current_time - self.last_cache_update
|
346 |
-
|
347 |
-
# Check if cache needs refresh
|
348 |
-
if not self.cached_models:
|
349 |
-
logger.info(LogFormatter.info("No cached data available, refreshing cache..."))
|
350 |
-
return await self._refresh_models_cache()
|
351 |
-
elif cache_age > self.cache_ttl:
|
352 |
-
logger.info(LogFormatter.info(f"Cache expired ({cache_age:.1f}s old, TTL: {self.cache_ttl}s)"))
|
353 |
-
return await self._refresh_models_cache()
|
354 |
-
else:
|
355 |
-
logger.info(LogFormatter.info(f"Using cached data ({cache_age:.1f}s old)"))
|
356 |
-
return self.cached_models
|
357 |
-
|
358 |
-
async def submit_model(
|
359 |
-
self,
|
360 |
-
model_data: Dict[str, Any],
|
361 |
-
user_id: str
|
362 |
-
) -> Dict[str, Any]:
|
363 |
-
logger.info(LogFormatter.section("MODEL SUBMISSION"))
|
364 |
-
self._log_repo_operation("write", f"{HF_ORGANIZATION}/requests", f"Submitting model {model_data['model_id']} by {user_id}")
|
365 |
-
stats = {
|
366 |
-
"Model": model_data["model_id"],
|
367 |
-
"User": user_id,
|
368 |
-
"Revision": model_data["revision"],
|
369 |
-
"Precision": model_data["precision"],
|
370 |
-
"Type": model_data["model_type"]
|
371 |
-
}
|
372 |
-
for line in LogFormatter.tree(stats, "Submission Details"):
|
373 |
-
logger.info(line)
|
374 |
-
|
375 |
-
# Validate required fields
|
376 |
-
required_fields = [
|
377 |
-
"model_id", "base_model", "revision", "precision",
|
378 |
-
"weight_type", "model_type", "use_chat_template"
|
379 |
-
]
|
380 |
-
for field in required_fields:
|
381 |
-
if field not in model_data:
|
382 |
-
raise ValueError(f"Missing required field: {field}")
|
383 |
-
|
384 |
-
# Get model info and validate it exists on HuggingFace
|
385 |
-
try:
|
386 |
-
logger.info(LogFormatter.subsection("MODEL VALIDATION"))
|
387 |
-
|
388 |
-
# Get the model info to check if it exists
|
389 |
-
model_info = self.hf_api.model_info(
|
390 |
-
model_data["model_id"],
|
391 |
-
revision=model_data["revision"],
|
392 |
-
token=self.token
|
393 |
-
)
|
394 |
-
|
395 |
-
if not model_info:
|
396 |
-
raise Exception(f"Model {model_data['model_id']} not found on HuggingFace Hub")
|
397 |
-
|
398 |
-
logger.info(LogFormatter.success("Model exists on HuggingFace Hub"))
|
399 |
-
|
400 |
-
except Exception as e:
|
401 |
-
logger.error(LogFormatter.error("Model validation failed", e))
|
402 |
-
raise
|
403 |
-
|
404 |
-
# Update model revision with commit sha
|
405 |
-
model_data["revision"] = model_info.sha
|
406 |
-
|
407 |
-
# Check if model already exists in the system
|
408 |
-
try:
|
409 |
-
logger.info(LogFormatter.subsection("CHECKING EXISTING SUBMISSIONS"))
|
410 |
-
existing_models = await self.get_models()
|
411 |
-
|
412 |
-
# Call the official provider status check
|
413 |
-
is_valid, error_message = await self.validator.check_official_provider_status(
|
414 |
-
model_data["model_id"],
|
415 |
-
existing_models
|
416 |
-
)
|
417 |
-
if not is_valid:
|
418 |
-
raise ValueError(error_message)
|
419 |
-
|
420 |
-
# Check in all statuses (pending, evaluating, finished)
|
421 |
-
for status, models in existing_models.items():
|
422 |
-
for model in models:
|
423 |
-
if model["name"] == model_data["model_id"] and model["revision"] == model_data["revision"]:
|
424 |
-
error_msg = f"Model {model_data['model_id']} revision {model_data['revision']} is already in the system with status: {status}"
|
425 |
-
logger.error(LogFormatter.error("Submission rejected", error_msg))
|
426 |
-
raise ValueError(error_msg)
|
427 |
-
|
428 |
-
logger.info(LogFormatter.success("No existing submission found"))
|
429 |
-
except ValueError:
|
430 |
-
raise
|
431 |
-
except Exception as e:
|
432 |
-
logger.error(LogFormatter.error("Failed to check existing submissions", e))
|
433 |
-
raise
|
434 |
-
|
435 |
-
# Check that model on hub and valid
|
436 |
-
valid, error, model_config = await self.validator.is_model_on_hub(
|
437 |
-
model_data["model_id"],
|
438 |
-
model_data["revision"],
|
439 |
-
test_tokenizer=True
|
440 |
-
)
|
441 |
-
if not valid:
|
442 |
-
logger.error(LogFormatter.error("Model on hub validation failed", error))
|
443 |
-
raise Exception(error)
|
444 |
-
logger.info(LogFormatter.success("Model on hub validation passed"))
|
445 |
-
|
446 |
-
# Validate model card
|
447 |
-
valid, error, model_card = await self.validator.check_model_card(
|
448 |
-
model_data["model_id"]
|
449 |
-
)
|
450 |
-
if not valid:
|
451 |
-
logger.error(LogFormatter.error("Model card validation failed", error))
|
452 |
-
raise Exception(error)
|
453 |
-
logger.info(LogFormatter.success("Model card validation passed"))
|
454 |
-
|
455 |
-
# Check size limits
|
456 |
-
model_size, error = await self.validator.get_model_size(
|
457 |
-
model_info,
|
458 |
-
model_data["precision"],
|
459 |
-
model_data["base_model"],
|
460 |
-
revision=model_data["revision"]
|
461 |
-
)
|
462 |
-
if model_size is None:
|
463 |
-
logger.error(LogFormatter.error("Model size validation failed", error))
|
464 |
-
raise Exception(error)
|
465 |
-
logger.info(LogFormatter.success(f"Model size validation passed: {model_size:.1f}B"))
|
466 |
-
|
467 |
-
# Size limits based on precision
|
468 |
-
if model_data["precision"] in ["float16", "bfloat16"] and model_size > 100:
|
469 |
-
error_msg = f"Model too large for {model_data['precision']} (limit: 100B)"
|
470 |
-
logger.error(LogFormatter.error("Size limit exceeded", error_msg))
|
471 |
-
raise Exception(error_msg)
|
472 |
-
|
473 |
-
# Chat template validation if requested
|
474 |
-
if model_data["use_chat_template"]:
|
475 |
-
valid, error = await self.validator.check_chat_template(
|
476 |
-
model_data["model_id"],
|
477 |
-
model_data["revision"]
|
478 |
-
)
|
479 |
-
if not valid:
|
480 |
-
logger.error(LogFormatter.error("Chat template validation failed", error))
|
481 |
-
raise Exception(error)
|
482 |
-
logger.info(LogFormatter.success("Chat template validation passed"))
|
483 |
-
|
484 |
-
|
485 |
-
architectures = model_info.config.get("architectures", "")
|
486 |
-
if architectures:
|
487 |
-
architectures = ";".join(architectures)
|
488 |
-
|
489 |
-
# Create eval entry
|
490 |
-
eval_entry = {
|
491 |
-
"model": model_data["model_id"],
|
492 |
-
"base_model": model_data["base_model"],
|
493 |
-
"revision": model_info.sha,
|
494 |
-
"precision": model_data["precision"],
|
495 |
-
"params": model_size,
|
496 |
-
"architectures": architectures,
|
497 |
-
"weight_type": model_data["weight_type"],
|
498 |
-
"status": "PENDING",
|
499 |
-
"submitted_time": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
500 |
-
"model_type": model_data["model_type"],
|
501 |
-
"job_id": -1,
|
502 |
-
"job_start_time": None,
|
503 |
-
"use_chat_template": model_data["use_chat_template"],
|
504 |
-
"sender": user_id
|
505 |
-
}
|
506 |
-
|
507 |
-
logger.info(LogFormatter.subsection("EVALUATION ENTRY"))
|
508 |
-
for line in LogFormatter.tree(eval_entry):
|
509 |
-
logger.info(line)
|
510 |
-
|
511 |
-
# Upload to HF dataset
|
512 |
-
try:
|
513 |
-
logger.info(LogFormatter.subsection("UPLOADING TO HUGGINGFACE"))
|
514 |
-
logger.info(LogFormatter.info(f"Uploading to {HF_ORGANIZATION}/requests..."))
|
515 |
-
|
516 |
-
# Construct the path in the dataset
|
517 |
-
org_or_user = model_data["model_id"].split("/")[0] if "/" in model_data["model_id"] else ""
|
518 |
-
model_path = model_data["model_id"].split("/")[-1]
|
519 |
-
relative_path = f"{org_or_user}/{model_path}_eval_request_False_{model_data['precision']}_{model_data['weight_type']}.json"
|
520 |
-
|
521 |
-
# Create a temporary file with the request
|
522 |
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
|
523 |
-
json.dump(eval_entry, temp_file, indent=2)
|
524 |
-
temp_file.flush()
|
525 |
-
temp_path = temp_file.name
|
526 |
-
|
527 |
-
# Upload file directly
|
528 |
-
self.hf_api.upload_file(
|
529 |
-
path_or_fileobj=temp_path,
|
530 |
-
path_in_repo=relative_path,
|
531 |
-
repo_id=f"{HF_ORGANIZATION}/requests",
|
532 |
-
repo_type="dataset",
|
533 |
-
commit_message=f"Add {model_data['model_id']} to eval queue",
|
534 |
-
token=self.token
|
535 |
-
)
|
536 |
-
|
537 |
-
# Clean up temp file
|
538 |
-
os.unlink(temp_path)
|
539 |
-
|
540 |
-
logger.info(LogFormatter.success("Upload successful"))
|
541 |
-
|
542 |
-
except Exception as e:
|
543 |
-
logger.error(LogFormatter.error("Upload failed", e))
|
544 |
-
raise
|
545 |
-
|
546 |
-
# Add automatic vote
|
547 |
-
try:
|
548 |
-
logger.info(LogFormatter.subsection("AUTOMATIC VOTE"))
|
549 |
-
logger.info(LogFormatter.info(f"Adding upvote for {model_data['model_id']} by {user_id}"))
|
550 |
-
await self.vote_service.add_vote(
|
551 |
-
model_data["model_id"],
|
552 |
-
user_id,
|
553 |
-
"up"
|
554 |
-
)
|
555 |
-
logger.info(LogFormatter.success("Vote recorded successfully"))
|
556 |
-
except Exception as e:
|
557 |
-
logger.error(LogFormatter.error("Failed to record vote", e))
|
558 |
-
# Don't raise here as the main submission was successful
|
559 |
-
|
560 |
-
return {
|
561 |
-
"status": "success",
|
562 |
-
"message": "The model was submitted successfully, and the vote has been recorded"
|
563 |
-
}
|
564 |
-
|
565 |
-
async def get_model_status(self, model_id: str) -> Dict[str, Any]:
|
566 |
-
"""Get evaluation status of a model"""
|
567 |
-
logger.info(LogFormatter.info(f"Checking status for model: {model_id}"))
|
568 |
-
eval_path = self.eval_requests_path
|
569 |
-
|
570 |
-
for user_folder in eval_path.iterdir():
|
571 |
-
if user_folder.is_dir():
|
572 |
-
for file in user_folder.glob("*.json"):
|
573 |
-
with open(file, "r") as f:
|
574 |
-
data = json.load(f)
|
575 |
-
if data["model"] == model_id:
|
576 |
-
status = {
|
577 |
-
"status": data["status"],
|
578 |
-
"submitted_time": data["submitted_time"],
|
579 |
-
"job_id": data.get("job_id", -1)
|
580 |
-
}
|
581 |
-
logger.info(LogFormatter.success("Status found"))
|
582 |
-
for line in LogFormatter.tree(status, "Model Status"):
|
583 |
-
logger.info(line)
|
584 |
-
return status
|
585 |
-
|
586 |
-
logger.warning(LogFormatter.warning(f"No status found for model: {model_id}"))
|
587 |
-
return {"status": "not_found"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/rate_limiter.py
DELETED
@@ -1,72 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
import logging
|
3 |
-
from datetime import datetime, timedelta, timezone
|
4 |
-
from typing import Tuple, Dict, List
|
5 |
-
|
6 |
-
logger = logging.getLogger(__name__)
|
7 |
-
|
8 |
-
class RateLimiter:
|
9 |
-
def __init__(self, period_days: int = 7, quota: int = 5):
|
10 |
-
self.period_days = period_days
|
11 |
-
self.quota = quota
|
12 |
-
self.submission_history: Dict[str, List[datetime]] = {}
|
13 |
-
self.higher_quota_users = set() # Users with higher quotas
|
14 |
-
self.unlimited_users = set() # Users with no quota limits
|
15 |
-
|
16 |
-
def add_unlimited_user(self, user_id: str):
|
17 |
-
"""Add a user to the unlimited users list"""
|
18 |
-
self.unlimited_users.add(user_id)
|
19 |
-
|
20 |
-
def add_higher_quota_user(self, user_id: str):
|
21 |
-
"""Add a user to the higher quota users list"""
|
22 |
-
self.higher_quota_users.add(user_id)
|
23 |
-
|
24 |
-
def record_submission(self, user_id: str):
|
25 |
-
"""Record a new submission for a user"""
|
26 |
-
current_time = datetime.now(timezone.utc)
|
27 |
-
if user_id not in self.submission_history:
|
28 |
-
self.submission_history[user_id] = []
|
29 |
-
self.submission_history[user_id].append(current_time)
|
30 |
-
|
31 |
-
def clean_old_submissions(self, user_id: str):
|
32 |
-
"""Remove submissions older than the period"""
|
33 |
-
if user_id not in self.submission_history:
|
34 |
-
return
|
35 |
-
|
36 |
-
current_time = datetime.now(timezone.utc)
|
37 |
-
cutoff_time = current_time - timedelta(days=self.period_days)
|
38 |
-
|
39 |
-
self.submission_history[user_id] = [
|
40 |
-
time for time in self.submission_history[user_id]
|
41 |
-
if time > cutoff_time
|
42 |
-
]
|
43 |
-
|
44 |
-
async def check_rate_limit(self, user_id: str) -> Tuple[bool, str]:
|
45 |
-
"""Check if a user has exceeded their rate limit
|
46 |
-
|
47 |
-
Returns:
|
48 |
-
Tuple[bool, str]: (is_allowed, error_message)
|
49 |
-
"""
|
50 |
-
# Unlimited users bypass all checks
|
51 |
-
if user_id in self.unlimited_users:
|
52 |
-
return True, ""
|
53 |
-
|
54 |
-
# Clean old submissions
|
55 |
-
self.clean_old_submissions(user_id)
|
56 |
-
|
57 |
-
# Get current submission count
|
58 |
-
submission_count = len(self.submission_history.get(user_id, []))
|
59 |
-
|
60 |
-
# Calculate user's quota
|
61 |
-
user_quota = self.quota * 2 if user_id in self.higher_quota_users else self.quota
|
62 |
-
|
63 |
-
# Check if user has exceeded their quota
|
64 |
-
if submission_count >= user_quota:
|
65 |
-
error_msg = (
|
66 |
-
f"User '{user_id}' has reached the limit of {user_quota} submissions "
|
67 |
-
f"in the last {self.period_days} days. Please wait before submitting again."
|
68 |
-
)
|
69 |
-
return False, error_msg
|
70 |
-
|
71 |
-
return True, ""
|
72 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/services/votes.py
DELETED
@@ -1,390 +0,0 @@
|
|
1 |
-
from datetime import datetime, timezone
|
2 |
-
from typing import Dict, Any, List, Set, Tuple, Optional
|
3 |
-
import json
|
4 |
-
import logging
|
5 |
-
import asyncio
|
6 |
-
from pathlib import Path
|
7 |
-
import aiohttp
|
8 |
-
from huggingface_hub import HfApi
|
9 |
-
import datasets
|
10 |
-
|
11 |
-
from app.services.hf_service import HuggingFaceService
|
12 |
-
from app.config import HF_TOKEN
|
13 |
-
from app.config.hf_config import HF_ORGANIZATION
|
14 |
-
from app.core.cache import cache_config
|
15 |
-
from app.core.formatting import LogFormatter
|
16 |
-
|
17 |
-
logger = logging.getLogger(__name__)
|
18 |
-
|
19 |
-
class VoteService(HuggingFaceService):
|
20 |
-
_instance: Optional['VoteService'] = None
|
21 |
-
_initialized = False
|
22 |
-
|
23 |
-
def __new__(cls):
|
24 |
-
if cls._instance is None:
|
25 |
-
cls._instance = super(VoteService, cls).__new__(cls)
|
26 |
-
return cls._instance
|
27 |
-
|
28 |
-
def __init__(self):
|
29 |
-
if not hasattr(self, '_init_done'):
|
30 |
-
super().__init__()
|
31 |
-
self.votes_file = cache_config.votes_file
|
32 |
-
self.votes_to_upload: List[Dict[str, Any]] = []
|
33 |
-
self.vote_check_set: Set[Tuple[str, str, str]] = set()
|
34 |
-
self._votes_by_model: Dict[str, List[Dict[str, Any]]] = {}
|
35 |
-
self._votes_by_user: Dict[str, List[Dict[str, Any]]] = {}
|
36 |
-
self._upload_lock = asyncio.Lock()
|
37 |
-
self._last_sync = None
|
38 |
-
self._sync_interval = 300 # 5 minutes
|
39 |
-
self._total_votes = 0
|
40 |
-
self._last_vote_timestamp = None
|
41 |
-
self._max_retries = 3
|
42 |
-
self._retry_delay = 1 # seconds
|
43 |
-
self._upload_batch_size = 10
|
44 |
-
self.hf_api = HfApi(token=HF_TOKEN)
|
45 |
-
self._init_done = True
|
46 |
-
|
47 |
-
async def initialize(self):
|
48 |
-
"""Initialize the vote service"""
|
49 |
-
if self._initialized:
|
50 |
-
await self._check_for_new_votes()
|
51 |
-
return
|
52 |
-
|
53 |
-
try:
|
54 |
-
logger.info(LogFormatter.section("VOTE SERVICE INITIALIZATION"))
|
55 |
-
|
56 |
-
# Ensure votes directory exists
|
57 |
-
self.votes_file.parent.mkdir(parents=True, exist_ok=True)
|
58 |
-
|
59 |
-
# Load existing votes if file exists
|
60 |
-
local_vote_count = 0
|
61 |
-
if self.votes_file.exists():
|
62 |
-
logger.info(LogFormatter.info(f"Loading votes from {self.votes_file}"))
|
63 |
-
local_vote_count = await self._count_local_votes()
|
64 |
-
logger.info(LogFormatter.info(f"Found {local_vote_count:,} local votes"))
|
65 |
-
|
66 |
-
# Check remote votes count
|
67 |
-
remote_vote_count = await self._count_remote_votes()
|
68 |
-
logger.info(LogFormatter.info(f"Found {remote_vote_count:,} remote votes"))
|
69 |
-
|
70 |
-
if remote_vote_count > local_vote_count:
|
71 |
-
logger.info(LogFormatter.info(f"Fetching {remote_vote_count - local_vote_count:,} new votes"))
|
72 |
-
await self._sync_with_hub()
|
73 |
-
elif remote_vote_count < local_vote_count:
|
74 |
-
logger.warning(LogFormatter.warning(f"Local votes ({local_vote_count:,}) > Remote votes ({remote_vote_count:,})"))
|
75 |
-
await self._load_existing_votes()
|
76 |
-
else:
|
77 |
-
logger.info(LogFormatter.success("Local and remote votes are in sync"))
|
78 |
-
if local_vote_count > 0:
|
79 |
-
await self._load_existing_votes()
|
80 |
-
else:
|
81 |
-
logger.info(LogFormatter.info("No votes found"))
|
82 |
-
|
83 |
-
self._initialized = True
|
84 |
-
self._last_sync = datetime.now(timezone.utc)
|
85 |
-
|
86 |
-
# Final summary
|
87 |
-
stats = {
|
88 |
-
"Total_Votes": self._total_votes,
|
89 |
-
"Last_Sync": self._last_sync.strftime("%Y-%m-%d %H:%M:%S UTC")
|
90 |
-
}
|
91 |
-
logger.info(LogFormatter.section("INITIALIZATION COMPLETE"))
|
92 |
-
for line in LogFormatter.stats(stats):
|
93 |
-
logger.info(line)
|
94 |
-
|
95 |
-
except Exception as e:
|
96 |
-
logger.error(LogFormatter.error("Initialization failed", e))
|
97 |
-
raise
|
98 |
-
|
99 |
-
async def _count_local_votes(self) -> int:
|
100 |
-
"""Count votes in local file"""
|
101 |
-
if not self.votes_file.exists():
|
102 |
-
return 0
|
103 |
-
|
104 |
-
count = 0
|
105 |
-
try:
|
106 |
-
with open(self.votes_file, 'r') as f:
|
107 |
-
for _ in f:
|
108 |
-
count += 1
|
109 |
-
return count
|
110 |
-
except Exception as e:
|
111 |
-
logger.error(f"Error counting local votes: {str(e)}")
|
112 |
-
return 0
|
113 |
-
|
114 |
-
async def _count_remote_votes(self) -> int:
|
115 |
-
"""Count votes in remote file"""
|
116 |
-
url = f"https://huggingface.co/datasets/{HF_ORGANIZATION}/votes/raw/main/votes_data.jsonl"
|
117 |
-
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
|
118 |
-
|
119 |
-
try:
|
120 |
-
async with aiohttp.ClientSession() as session:
|
121 |
-
async with session.get(url, headers=headers) as response:
|
122 |
-
if response.status == 200:
|
123 |
-
count = 0
|
124 |
-
async for line in response.content:
|
125 |
-
if line.strip(): # Skip empty lines
|
126 |
-
count += 1
|
127 |
-
return count
|
128 |
-
else:
|
129 |
-
logger.error(f"Failed to get remote votes: HTTP {response.status}")
|
130 |
-
return 0
|
131 |
-
except Exception as e:
|
132 |
-
logger.error(f"Error counting remote votes: {str(e)}")
|
133 |
-
return 0
|
134 |
-
|
135 |
-
async def _sync_with_hub(self):
|
136 |
-
"""Sync votes with HuggingFace hub using datasets"""
|
137 |
-
try:
|
138 |
-
logger.info(LogFormatter.section("VOTE SYNC"))
|
139 |
-
self._log_repo_operation("sync", f"{HF_ORGANIZATION}/votes", "Syncing local votes with HF hub")
|
140 |
-
logger.info(LogFormatter.info("Syncing with HuggingFace hub..."))
|
141 |
-
|
142 |
-
# Load votes from HF dataset
|
143 |
-
dataset = datasets.load_dataset(
|
144 |
-
f"{HF_ORGANIZATION}/votes",
|
145 |
-
split="train",
|
146 |
-
cache_dir=cache_config.get_cache_path("datasets")
|
147 |
-
)
|
148 |
-
|
149 |
-
remote_votes = len(dataset)
|
150 |
-
logger.info(LogFormatter.info(f"Dataset loaded with {remote_votes:,} votes"))
|
151 |
-
|
152 |
-
# Convert to list of dictionaries
|
153 |
-
df = dataset.to_pandas()
|
154 |
-
if 'timestamp' in df.columns:
|
155 |
-
df['timestamp'] = df['timestamp'].dt.strftime('%Y-%m-%dT%H:%M:%SZ')
|
156 |
-
remote_votes = df.to_dict('records')
|
157 |
-
|
158 |
-
# If we have more remote votes than local
|
159 |
-
if len(remote_votes) > self._total_votes:
|
160 |
-
new_votes = len(remote_votes) - self._total_votes
|
161 |
-
logger.info(LogFormatter.info(f"Processing {new_votes:,} new votes..."))
|
162 |
-
|
163 |
-
# Save votes to local file
|
164 |
-
with open(self.votes_file, 'w') as f:
|
165 |
-
for vote in remote_votes:
|
166 |
-
f.write(json.dumps(vote) + '\n')
|
167 |
-
|
168 |
-
# Reload votes in memory
|
169 |
-
await self._load_existing_votes()
|
170 |
-
logger.info(LogFormatter.success("Sync completed successfully"))
|
171 |
-
else:
|
172 |
-
logger.info(LogFormatter.success("Local votes are up to date"))
|
173 |
-
|
174 |
-
self._last_sync = datetime.now(timezone.utc)
|
175 |
-
|
176 |
-
except Exception as e:
|
177 |
-
logger.error(LogFormatter.error("Sync failed", e))
|
178 |
-
raise
|
179 |
-
|
180 |
-
async def _check_for_new_votes(self):
|
181 |
-
"""Check for new votes on the hub"""
|
182 |
-
try:
|
183 |
-
self._log_repo_operation("check", f"{HF_ORGANIZATION}/votes", "Checking for new votes")
|
184 |
-
# Load only dataset metadata
|
185 |
-
dataset_info = datasets.load_dataset(f"{HF_ORGANIZATION}/votes", split="train")
|
186 |
-
remote_vote_count = len(dataset_info)
|
187 |
-
|
188 |
-
if remote_vote_count > self._total_votes:
|
189 |
-
logger.info(f"Found {remote_vote_count - self._total_votes} new votes on hub")
|
190 |
-
await self._sync_with_hub()
|
191 |
-
else:
|
192 |
-
logger.info("No new votes found on hub")
|
193 |
-
|
194 |
-
except Exception as e:
|
195 |
-
logger.error(f"Error checking for new votes: {str(e)}")
|
196 |
-
|
197 |
-
async def _load_existing_votes(self):
|
198 |
-
"""Load existing votes from file"""
|
199 |
-
if not self.votes_file.exists():
|
200 |
-
logger.warning(LogFormatter.warning("No votes file found"))
|
201 |
-
return
|
202 |
-
|
203 |
-
try:
|
204 |
-
logger.info(LogFormatter.section("LOADING VOTES"))
|
205 |
-
|
206 |
-
# Clear existing data structures
|
207 |
-
self.vote_check_set.clear()
|
208 |
-
self._votes_by_model.clear()
|
209 |
-
self._votes_by_user.clear()
|
210 |
-
|
211 |
-
vote_count = 0
|
212 |
-
latest_timestamp = None
|
213 |
-
|
214 |
-
with open(self.votes_file, "r") as f:
|
215 |
-
for line in f:
|
216 |
-
try:
|
217 |
-
vote = json.loads(line.strip())
|
218 |
-
vote_count += 1
|
219 |
-
|
220 |
-
# Track latest timestamp
|
221 |
-
try:
|
222 |
-
vote_timestamp = datetime.fromisoformat(vote["timestamp"].replace("Z", "+00:00"))
|
223 |
-
if not latest_timestamp or vote_timestamp > latest_timestamp:
|
224 |
-
latest_timestamp = vote_timestamp
|
225 |
-
vote["timestamp"] = vote_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
|
226 |
-
except (KeyError, ValueError) as e:
|
227 |
-
logger.warning(LogFormatter.warning(f"Invalid timestamp in vote: {str(e)}"))
|
228 |
-
continue
|
229 |
-
|
230 |
-
if vote_count % 1000 == 0:
|
231 |
-
logger.info(LogFormatter.info(f"Processed {vote_count:,} votes..."))
|
232 |
-
|
233 |
-
self._add_vote_to_memory(vote)
|
234 |
-
|
235 |
-
except json.JSONDecodeError as e:
|
236 |
-
logger.error(LogFormatter.error("Vote parsing failed", e))
|
237 |
-
continue
|
238 |
-
except Exception as e:
|
239 |
-
logger.error(LogFormatter.error("Vote processing failed", e))
|
240 |
-
continue
|
241 |
-
|
242 |
-
self._total_votes = vote_count
|
243 |
-
self._last_vote_timestamp = latest_timestamp
|
244 |
-
|
245 |
-
# Final summary
|
246 |
-
stats = {
|
247 |
-
"Total_Votes": vote_count,
|
248 |
-
"Latest_Vote": latest_timestamp.strftime("%Y-%m-%d %H:%M:%S UTC") if latest_timestamp else "None",
|
249 |
-
"Unique_Models": len(self._votes_by_model),
|
250 |
-
"Unique_Users": len(self._votes_by_user)
|
251 |
-
}
|
252 |
-
|
253 |
-
logger.info(LogFormatter.section("VOTE SUMMARY"))
|
254 |
-
for line in LogFormatter.stats(stats):
|
255 |
-
logger.info(line)
|
256 |
-
|
257 |
-
except Exception as e:
|
258 |
-
logger.error(LogFormatter.error("Failed to load votes", e))
|
259 |
-
raise
|
260 |
-
|
261 |
-
def _add_vote_to_memory(self, vote: Dict[str, Any]):
|
262 |
-
"""Add vote to memory structures"""
|
263 |
-
try:
|
264 |
-
check_tuple = (vote["model"], vote["revision"], vote["username"])
|
265 |
-
|
266 |
-
# Skip if we already have this vote
|
267 |
-
if check_tuple in self.vote_check_set:
|
268 |
-
return
|
269 |
-
|
270 |
-
self.vote_check_set.add(check_tuple)
|
271 |
-
|
272 |
-
# Update model votes
|
273 |
-
if vote["model"] not in self._votes_by_model:
|
274 |
-
self._votes_by_model[vote["model"]] = []
|
275 |
-
self._votes_by_model[vote["model"]].append(vote)
|
276 |
-
|
277 |
-
# Update user votes
|
278 |
-
if vote["username"] not in self._votes_by_user:
|
279 |
-
self._votes_by_user[vote["username"]] = []
|
280 |
-
self._votes_by_user[vote["username"]].append(vote)
|
281 |
-
|
282 |
-
except KeyError as e:
|
283 |
-
logger.error(f"Malformed vote data, missing key: {str(e)}")
|
284 |
-
except Exception as e:
|
285 |
-
logger.error(f"Error adding vote to memory: {str(e)}")
|
286 |
-
|
287 |
-
async def get_user_votes(self, user_id: str) -> List[Dict[str, Any]]:
|
288 |
-
"""Get all votes from a specific user"""
|
289 |
-
logger.info(LogFormatter.info(f"Fetching votes for user: {user_id}"))
|
290 |
-
votes = self._votes_by_user.get(user_id, [])
|
291 |
-
logger.info(LogFormatter.success(f"Found {len(votes):,} votes"))
|
292 |
-
return votes
|
293 |
-
|
294 |
-
async def get_model_votes(self, model_id: str) -> Dict[str, Any]:
|
295 |
-
"""Get all votes for a specific model"""
|
296 |
-
logger.info(LogFormatter.info(f"Fetching votes for model: {model_id}"))
|
297 |
-
votes = self._votes_by_model.get(model_id, [])
|
298 |
-
|
299 |
-
# Group votes by revision
|
300 |
-
votes_by_revision = {}
|
301 |
-
for vote in votes:
|
302 |
-
revision = vote["revision"]
|
303 |
-
if revision not in votes_by_revision:
|
304 |
-
votes_by_revision[revision] = 0
|
305 |
-
votes_by_revision[revision] += 1
|
306 |
-
|
307 |
-
stats = {
|
308 |
-
"Total_Votes": len(votes),
|
309 |
-
**{f"Revision_{k}": v for k, v in votes_by_revision.items()}
|
310 |
-
}
|
311 |
-
|
312 |
-
logger.info(LogFormatter.section("VOTE STATISTICS"))
|
313 |
-
for line in LogFormatter.stats(stats):
|
314 |
-
logger.info(line)
|
315 |
-
|
316 |
-
return {
|
317 |
-
"total_votes": len(votes),
|
318 |
-
"votes_by_revision": votes_by_revision,
|
319 |
-
"votes": votes
|
320 |
-
}
|
321 |
-
|
322 |
-
async def _get_model_revision(self, model_id: str) -> str:
|
323 |
-
"""Get current revision of a model with retries"""
|
324 |
-
logger.info(f"Getting revision for model: {model_id}")
|
325 |
-
for attempt in range(self._max_retries):
|
326 |
-
try:
|
327 |
-
model_info = await asyncio.to_thread(self.hf_api.model_info, model_id)
|
328 |
-
logger.info(f"Successfully got revision {model_info.sha} for model {model_id}")
|
329 |
-
return model_info.sha
|
330 |
-
except Exception as e:
|
331 |
-
logger.error(f"Error getting model revision for {model_id} (attempt {attempt + 1}): {str(e)}")
|
332 |
-
if attempt < self._max_retries - 1:
|
333 |
-
retry_delay = self._retry_delay * (attempt + 1)
|
334 |
-
logger.info(f"Retrying in {retry_delay} seconds...")
|
335 |
-
await asyncio.sleep(retry_delay)
|
336 |
-
else:
|
337 |
-
logger.warning(f"Using 'main' as fallback revision for {model_id} after {self._max_retries} failed attempts")
|
338 |
-
return "main"
|
339 |
-
|
340 |
-
async def add_vote(self, model_id: str, user_id: str, vote_type: str) -> Dict[str, Any]:
|
341 |
-
"""Add a vote for a model"""
|
342 |
-
try:
|
343 |
-
self._log_repo_operation("add", f"{HF_ORGANIZATION}/votes", f"Adding {vote_type} vote for {model_id} by {user_id}")
|
344 |
-
logger.info(LogFormatter.section("NEW VOTE"))
|
345 |
-
stats = {
|
346 |
-
"Model": model_id,
|
347 |
-
"User": user_id,
|
348 |
-
"Type": vote_type
|
349 |
-
}
|
350 |
-
for line in LogFormatter.tree(stats, "Vote Details"):
|
351 |
-
logger.info(line)
|
352 |
-
|
353 |
-
revision = await self._get_model_revision(model_id)
|
354 |
-
check_tuple = (model_id, revision, user_id)
|
355 |
-
|
356 |
-
if check_tuple in self.vote_check_set:
|
357 |
-
raise ValueError("Vote already recorded for this model")
|
358 |
-
|
359 |
-
vote = {
|
360 |
-
"model": model_id,
|
361 |
-
"revision": revision,
|
362 |
-
"username": user_id,
|
363 |
-
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
364 |
-
"vote_type": vote_type
|
365 |
-
}
|
366 |
-
|
367 |
-
# Update local storage
|
368 |
-
with open(self.votes_file, "a") as f:
|
369 |
-
f.write(json.dumps(vote) + "\n")
|
370 |
-
|
371 |
-
self._add_vote_to_memory(vote)
|
372 |
-
self.votes_to_upload.append(vote)
|
373 |
-
|
374 |
-
stats = {
|
375 |
-
"Status": "Success",
|
376 |
-
"Queue_Size": len(self.votes_to_upload)
|
377 |
-
}
|
378 |
-
for line in LogFormatter.stats(stats):
|
379 |
-
logger.info(line)
|
380 |
-
|
381 |
-
# Try to upload if batch size reached
|
382 |
-
if len(self.votes_to_upload) >= self._upload_batch_size:
|
383 |
-
logger.info(LogFormatter.info(f"Upload batch size reached ({self._upload_batch_size}), triggering sync"))
|
384 |
-
await self._sync_with_hub()
|
385 |
-
|
386 |
-
return {"status": "success", "message": "Vote added successfully"}
|
387 |
-
|
388 |
-
except Exception as e:
|
389 |
-
logger.error(LogFormatter.error("Failed to add vote", e))
|
390 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/app/utils/__init__.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
from . import model_validation
|
2 |
-
|
3 |
-
__all__ = ["model_validation"]
|
|
|
|
|
|
|
|
backend/app/utils/logging.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
from app.core.formatting import LogFormatter
|
2 |
-
|
3 |
-
__all__ = ['LogFormatter']
|
|
|
|
|
|
|
|
backend/app/utils/model_validation.py
DELETED
@@ -1,266 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import logging
|
3 |
-
import asyncio
|
4 |
-
from typing import Tuple, Optional, Dict, Any
|
5 |
-
from datasets import load_dataset
|
6 |
-
from huggingface_hub import HfApi, ModelCard, hf_hub_download
|
7 |
-
from huggingface_hub import hf_api
|
8 |
-
from transformers import AutoConfig, AutoTokenizer
|
9 |
-
from app.config.base import HF_TOKEN
|
10 |
-
from app.config.hf_config import OFFICIAL_PROVIDERS_REPO
|
11 |
-
from app.core.formatting import LogFormatter
|
12 |
-
|
13 |
-
logger = logging.getLogger(__name__)
|
14 |
-
|
15 |
-
class ModelValidator:
|
16 |
-
def __init__(self):
|
17 |
-
self.token = HF_TOKEN
|
18 |
-
self.api = HfApi(token=self.token)
|
19 |
-
self.headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
20 |
-
|
21 |
-
async def check_model_card(self, model_id: str) -> Tuple[bool, str, Optional[Dict[str, Any]]]:
|
22 |
-
"""Check if model has a valid model card"""
|
23 |
-
try:
|
24 |
-
logger.info(LogFormatter.info(f"Checking model card for {model_id}"))
|
25 |
-
|
26 |
-
# Get model card content using ModelCard.load
|
27 |
-
try:
|
28 |
-
model_card = await asyncio.to_thread(
|
29 |
-
ModelCard.load,
|
30 |
-
model_id
|
31 |
-
)
|
32 |
-
logger.info(LogFormatter.success("Model card found"))
|
33 |
-
except Exception as e:
|
34 |
-
error_msg = "Please add a model card to your model to explain how you trained/fine-tuned it."
|
35 |
-
logger.error(LogFormatter.error(error_msg, e))
|
36 |
-
return False, error_msg, None
|
37 |
-
|
38 |
-
# Check license in model card data
|
39 |
-
if model_card.data.license is None and not ("license_name" in model_card.data and "license_link" in model_card.data):
|
40 |
-
error_msg = "License not found. Please add a license to your model card using the `license` metadata or a `license_name`/`license_link` pair."
|
41 |
-
logger.warning(LogFormatter.warning(error_msg))
|
42 |
-
return False, error_msg, None
|
43 |
-
|
44 |
-
# Enforce card content length
|
45 |
-
if len(model_card.text) < 200:
|
46 |
-
error_msg = "Please add a description to your model card, it is too short."
|
47 |
-
logger.warning(LogFormatter.warning(error_msg))
|
48 |
-
return False, error_msg, None
|
49 |
-
|
50 |
-
logger.info(LogFormatter.success("Model card validation passed"))
|
51 |
-
return True, "", model_card
|
52 |
-
|
53 |
-
except Exception as e:
|
54 |
-
error_msg = "Failed to validate model card"
|
55 |
-
logger.error(LogFormatter.error(error_msg, e))
|
56 |
-
return False, str(e), None
|
57 |
-
|
58 |
-
async def get_safetensors_metadata(self, model_id: str, is_adapter: bool = False, revision: str = "main") -> Optional[Dict]:
|
59 |
-
"""Get metadata from a safetensors file"""
|
60 |
-
try:
|
61 |
-
if is_adapter:
|
62 |
-
metadata = await asyncio.to_thread(
|
63 |
-
hf_api.parse_safetensors_file_metadata,
|
64 |
-
model_id,
|
65 |
-
"adapter_model.safetensors",
|
66 |
-
token=self.token,
|
67 |
-
revision=revision,
|
68 |
-
)
|
69 |
-
else:
|
70 |
-
metadata = await asyncio.to_thread(
|
71 |
-
hf_api.get_safetensors_metadata,
|
72 |
-
repo_id=model_id,
|
73 |
-
token=self.token,
|
74 |
-
revision=revision,
|
75 |
-
)
|
76 |
-
return metadata
|
77 |
-
|
78 |
-
except Exception as e:
|
79 |
-
logger.error(f"Failed to get safetensors metadata: {str(e)}")
|
80 |
-
return None
|
81 |
-
|
82 |
-
async def get_model_size(
|
83 |
-
self,
|
84 |
-
model_info: Any,
|
85 |
-
precision: str,
|
86 |
-
base_model: str,
|
87 |
-
revision: str
|
88 |
-
) -> Tuple[Optional[float], Optional[str]]:
|
89 |
-
"""Get model size in billions of parameters"""
|
90 |
-
try:
|
91 |
-
logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
|
92 |
-
|
93 |
-
# Check if model is adapter
|
94 |
-
is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
|
95 |
-
|
96 |
-
# Try to get size from safetensors first
|
97 |
-
model_size = None
|
98 |
-
|
99 |
-
if is_adapter and base_model:
|
100 |
-
# For adapters, we need both adapter and base model sizes
|
101 |
-
adapter_meta = await self.get_safetensors_metadata(model_info.id, is_adapter=True, revision=revision)
|
102 |
-
base_meta = await self.get_safetensors_metadata(base_model, revision="main")
|
103 |
-
|
104 |
-
if adapter_meta and base_meta:
|
105 |
-
adapter_size = sum(adapter_meta.parameter_count.values())
|
106 |
-
base_size = sum(base_meta.parameter_count.values())
|
107 |
-
model_size = adapter_size + base_size
|
108 |
-
else:
|
109 |
-
# For regular models, just get the model size
|
110 |
-
meta = await self.get_safetensors_metadata(model_info.id, revision=revision)
|
111 |
-
if meta:
|
112 |
-
model_size = sum(meta.parameter_count.values()) # total params
|
113 |
-
|
114 |
-
if model_size is None:
|
115 |
-
# If model size could not be determined, return an error
|
116 |
-
return None, "Model size could not be determined"
|
117 |
-
|
118 |
-
# Adjust size for GPTQ models
|
119 |
-
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
120 |
-
model_size = model_size / 1e9 # Convert to billions, assuming float16
|
121 |
-
model_size = round(size_factor * model_size, 3)
|
122 |
-
|
123 |
-
logger.info(LogFormatter.success(f"Model size: {model_size}B parameters"))
|
124 |
-
return model_size, None
|
125 |
-
|
126 |
-
except Exception as e:
|
127 |
-
logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
|
128 |
-
return None, str(e)
|
129 |
-
|
130 |
-
|
131 |
-
async def check_chat_template(
|
132 |
-
self,
|
133 |
-
model_id: str,
|
134 |
-
revision: str
|
135 |
-
) -> Tuple[bool, Optional[str]]:
|
136 |
-
"""Check if model has a valid chat template"""
|
137 |
-
try:
|
138 |
-
logger.info(LogFormatter.info(f"Checking chat template for {model_id}"))
|
139 |
-
|
140 |
-
try:
|
141 |
-
config_file = await asyncio.to_thread(
|
142 |
-
hf_hub_download,
|
143 |
-
repo_id=model_id,
|
144 |
-
filename="tokenizer_config.json",
|
145 |
-
revision=revision,
|
146 |
-
repo_type="model"
|
147 |
-
)
|
148 |
-
|
149 |
-
with open(config_file, 'r') as f:
|
150 |
-
tokenizer_config = json.load(f)
|
151 |
-
|
152 |
-
if 'chat_template' not in tokenizer_config:
|
153 |
-
error_msg = f"The model {model_id} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
|
154 |
-
logger.error(LogFormatter.error(error_msg))
|
155 |
-
return False, error_msg
|
156 |
-
|
157 |
-
logger.info(LogFormatter.success("Valid chat template found"))
|
158 |
-
return True, None
|
159 |
-
|
160 |
-
except Exception as e:
|
161 |
-
error_msg = f"Error checking chat_template: {str(e)}"
|
162 |
-
logger.error(LogFormatter.error(error_msg))
|
163 |
-
return False, error_msg
|
164 |
-
|
165 |
-
except Exception as e:
|
166 |
-
error_msg = "Failed to check chat template"
|
167 |
-
logger.error(LogFormatter.error(error_msg, e))
|
168 |
-
return False, str(e)
|
169 |
-
|
170 |
-
async def is_model_on_hub(
|
171 |
-
self,
|
172 |
-
model_name: str,
|
173 |
-
revision: str,
|
174 |
-
test_tokenizer: bool = False,
|
175 |
-
trust_remote_code: bool = False
|
176 |
-
) -> Tuple[bool, Optional[str], Optional[Any]]:
|
177 |
-
"""Check if model exists and is properly configured on the Hub"""
|
178 |
-
try:
|
179 |
-
config = await asyncio.to_thread(
|
180 |
-
AutoConfig.from_pretrained,
|
181 |
-
model_name,
|
182 |
-
revision=revision,
|
183 |
-
trust_remote_code=trust_remote_code,
|
184 |
-
token=self.token,
|
185 |
-
force_download=True
|
186 |
-
)
|
187 |
-
|
188 |
-
if test_tokenizer:
|
189 |
-
try:
|
190 |
-
await asyncio.to_thread(
|
191 |
-
AutoTokenizer.from_pretrained,
|
192 |
-
model_name,
|
193 |
-
revision=revision,
|
194 |
-
trust_remote_code=trust_remote_code,
|
195 |
-
token=self.token
|
196 |
-
)
|
197 |
-
except ValueError as e:
|
198 |
-
return False, f"The tokenizer is not available in an official Transformers release: {e}", None
|
199 |
-
except Exception:
|
200 |
-
return False, "The tokenizer cannot be loaded. Ensure the tokenizer class is part of a stable Transformers release and correctly configured.", None
|
201 |
-
|
202 |
-
return True, None, config
|
203 |
-
|
204 |
-
except ValueError:
|
205 |
-
return False, "The model requires `trust_remote_code=True` to launch, and for safety reasons, we don't accept such models automatically.", None
|
206 |
-
except Exception as e:
|
207 |
-
if "You are trying to access a gated repo." in str(e):
|
208 |
-
return True, "The model is gated and requires special access permissions.", None
|
209 |
-
return False, f"The model was not found or is misconfigured on the Hub. Error: {e.args[0]}", None
|
210 |
-
|
211 |
-
async def check_official_provider_status(
|
212 |
-
self,
|
213 |
-
model_id: str,
|
214 |
-
existing_models: Dict[str, list]
|
215 |
-
) -> Tuple[bool, Optional[str]]:
|
216 |
-
"""
|
217 |
-
Check if model is from official provider and has finished submission.
|
218 |
-
|
219 |
-
Args:
|
220 |
-
model_id: The model identifier (org/model-name)
|
221 |
-
existing_models: Dictionary of models by status from get_models()
|
222 |
-
|
223 |
-
Returns:
|
224 |
-
Tuple[bool, Optional[str]]: (is_valid, error_message)
|
225 |
-
"""
|
226 |
-
try:
|
227 |
-
logger.info(LogFormatter.info(f"Checking official provider status for {model_id}"))
|
228 |
-
|
229 |
-
# Get model organization
|
230 |
-
model_org = model_id.split('/')[0] if '/' in model_id else None
|
231 |
-
|
232 |
-
if not model_org:
|
233 |
-
return True, None
|
234 |
-
|
235 |
-
# Load official providers dataset
|
236 |
-
dataset = load_dataset(OFFICIAL_PROVIDERS_REPO)
|
237 |
-
official_providers = dataset["train"][0]["CURATED_SET"]
|
238 |
-
|
239 |
-
# Check if model org is in official providers
|
240 |
-
is_official = model_org in official_providers
|
241 |
-
|
242 |
-
if is_official:
|
243 |
-
logger.info(LogFormatter.info(f"Model organization '{model_org}' is an official provider"))
|
244 |
-
|
245 |
-
# Check for finished submissions
|
246 |
-
if "finished" in existing_models:
|
247 |
-
for model in existing_models["finished"]:
|
248 |
-
if model["name"] == model_id:
|
249 |
-
error_msg = (
|
250 |
-
f"Model {model_id} is an official provider model "
|
251 |
-
f"with a completed evaluation. "
|
252 |
-
f"To re-evaluate, please open a discussion."
|
253 |
-
)
|
254 |
-
logger.error(LogFormatter.error("Validation failed", error_msg))
|
255 |
-
return False, error_msg
|
256 |
-
|
257 |
-
logger.info(LogFormatter.success("No finished submission found for this official provider model"))
|
258 |
-
else:
|
259 |
-
logger.info(LogFormatter.info(f"Model organization '{model_org}' is not an official provider"))
|
260 |
-
|
261 |
-
return True, None
|
262 |
-
|
263 |
-
except Exception as e:
|
264 |
-
error_msg = f"Failed to check official provider status: {str(e)}"
|
265 |
-
logger.error(LogFormatter.error(error_msg))
|
266 |
-
return False, error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/pyproject.toml
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
[tool.poetry]
|
2 |
-
name = "llm-leaderboard-backend"
|
3 |
-
version = "0.1.0"
|
4 |
-
description = "Backend for the Open LLM Leaderboard"
|
5 |
-
authors = ["Your Name <your.email@example.com>"]
|
6 |
-
|
7 |
-
[tool.poetry.dependencies]
|
8 |
-
python = "^3.12"
|
9 |
-
fastapi = "^0.115.6"
|
10 |
-
uvicorn = {extras = ["standard"], version = "^0.34.0"}
|
11 |
-
numpy = "^2.2.0"
|
12 |
-
pandas = "^2.2.3"
|
13 |
-
datasets = "^3.2.0"
|
14 |
-
pyarrow = "^18.1.0"
|
15 |
-
python-multipart = "^0.0.20"
|
16 |
-
huggingface-hub = "^0.27.1"
|
17 |
-
transformers = "4.48.0"
|
18 |
-
safetensors = "^0.4.5"
|
19 |
-
aiofiles = "^24.1.0"
|
20 |
-
fastapi-cache2 = "^0.2.1"
|
21 |
-
python-dotenv = "^1.0.1"
|
22 |
-
|
23 |
-
[tool.poetry.group.dev.dependencies]
|
24 |
-
pytest = "^8.3.4"
|
25 |
-
black = "^24.10.0"
|
26 |
-
isort = "^5.13.2"
|
27 |
-
flake8 = "^6.1.0"
|
28 |
-
|
29 |
-
[build-system]
|
30 |
-
requires = ["poetry-core>=1.0.0"]
|
31 |
-
build-backend = "poetry.core.masonry.api"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/utils/analyze_prod_datasets.py
DELETED
@@ -1,170 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import logging
|
4 |
-
from datetime import datetime
|
5 |
-
from pathlib import Path
|
6 |
-
from typing import Dict, Any, List
|
7 |
-
from huggingface_hub import HfApi
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
from app.config.hf_config import HF_ORGANIZATION
|
10 |
-
|
11 |
-
# Get the backend directory path
|
12 |
-
BACKEND_DIR = Path(__file__).parent.parent
|
13 |
-
ROOT_DIR = BACKEND_DIR.parent
|
14 |
-
|
15 |
-
# Load environment variables from .env file in root directory
|
16 |
-
load_dotenv(ROOT_DIR / ".env")
|
17 |
-
|
18 |
-
# Configure logging
|
19 |
-
logging.basicConfig(
|
20 |
-
level=logging.INFO,
|
21 |
-
format='%(message)s'
|
22 |
-
)
|
23 |
-
logger = logging.getLogger(__name__)
|
24 |
-
|
25 |
-
# Initialize Hugging Face API
|
26 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
27 |
-
if not HF_TOKEN:
|
28 |
-
raise ValueError("HF_TOKEN not found in environment variables")
|
29 |
-
api = HfApi(token=HF_TOKEN)
|
30 |
-
|
31 |
-
def analyze_dataset(repo_id: str) -> Dict[str, Any]:
|
32 |
-
"""Analyze a dataset and return statistics"""
|
33 |
-
try:
|
34 |
-
# Get dataset info
|
35 |
-
dataset_info = api.dataset_info(repo_id=repo_id)
|
36 |
-
|
37 |
-
# Get file list
|
38 |
-
files = api.list_repo_files(repo_id, repo_type="dataset")
|
39 |
-
|
40 |
-
# Get last commit info
|
41 |
-
commits = api.list_repo_commits(repo_id, repo_type="dataset")
|
42 |
-
last_commit = next(commits, None)
|
43 |
-
|
44 |
-
# Count lines in jsonl files
|
45 |
-
total_entries = 0
|
46 |
-
for file in files:
|
47 |
-
if file.endswith('.jsonl'):
|
48 |
-
try:
|
49 |
-
# Download file content
|
50 |
-
content = api.hf_hub_download(
|
51 |
-
repo_id=repo_id,
|
52 |
-
filename=file,
|
53 |
-
repo_type="dataset"
|
54 |
-
)
|
55 |
-
|
56 |
-
# Count lines
|
57 |
-
with open(content, 'r') as f:
|
58 |
-
for _ in f:
|
59 |
-
total_entries += 1
|
60 |
-
|
61 |
-
except Exception as e:
|
62 |
-
logger.error(f"Error processing file {file}: {str(e)}")
|
63 |
-
continue
|
64 |
-
|
65 |
-
# Special handling for requests dataset
|
66 |
-
if repo_id == f"{HF_ORGANIZATION}/requests":
|
67 |
-
pending_count = 0
|
68 |
-
completed_count = 0
|
69 |
-
|
70 |
-
try:
|
71 |
-
content = api.hf_hub_download(
|
72 |
-
repo_id=repo_id,
|
73 |
-
filename="eval_requests.jsonl",
|
74 |
-
repo_type="dataset"
|
75 |
-
)
|
76 |
-
|
77 |
-
with open(content, 'r') as f:
|
78 |
-
for line in f:
|
79 |
-
try:
|
80 |
-
entry = json.loads(line)
|
81 |
-
if entry.get("status") == "pending":
|
82 |
-
pending_count += 1
|
83 |
-
elif entry.get("status") == "completed":
|
84 |
-
completed_count += 1
|
85 |
-
except json.JSONDecodeError:
|
86 |
-
continue
|
87 |
-
|
88 |
-
except Exception as e:
|
89 |
-
logger.error(f"Error analyzing requests: {str(e)}")
|
90 |
-
|
91 |
-
# Build response
|
92 |
-
response = {
|
93 |
-
"id": repo_id,
|
94 |
-
"last_modified": last_commit.created_at if last_commit else None,
|
95 |
-
"total_entries": total_entries,
|
96 |
-
"file_count": len(files),
|
97 |
-
"size_bytes": dataset_info.size_in_bytes,
|
98 |
-
"downloads": dataset_info.downloads,
|
99 |
-
}
|
100 |
-
|
101 |
-
# Add request-specific info if applicable
|
102 |
-
if repo_id == f"{HF_ORGANIZATION}/requests":
|
103 |
-
response.update({
|
104 |
-
"pending_requests": pending_count,
|
105 |
-
"completed_requests": completed_count
|
106 |
-
})
|
107 |
-
|
108 |
-
return response
|
109 |
-
|
110 |
-
except Exception as e:
|
111 |
-
logger.error(f"Error analyzing dataset {repo_id}: {str(e)}")
|
112 |
-
return {
|
113 |
-
"id": repo_id,
|
114 |
-
"error": str(e)
|
115 |
-
}
|
116 |
-
|
117 |
-
def main():
|
118 |
-
"""Main function to analyze all datasets"""
|
119 |
-
try:
|
120 |
-
# List of datasets to analyze
|
121 |
-
datasets = [
|
122 |
-
{
|
123 |
-
"id": f"{HF_ORGANIZATION}/contents",
|
124 |
-
"description": "Aggregated results"
|
125 |
-
},
|
126 |
-
{
|
127 |
-
"id": f"{HF_ORGANIZATION}/requests",
|
128 |
-
"description": "Evaluation requests"
|
129 |
-
},
|
130 |
-
{
|
131 |
-
"id": f"{HF_ORGANIZATION}/votes",
|
132 |
-
"description": "User votes"
|
133 |
-
},
|
134 |
-
{
|
135 |
-
"id": f"{HF_ORGANIZATION}/official-providers",
|
136 |
-
"description": "Highlighted models"
|
137 |
-
}
|
138 |
-
]
|
139 |
-
|
140 |
-
# Analyze each dataset
|
141 |
-
results = []
|
142 |
-
for dataset in datasets:
|
143 |
-
logger.info(f"\nAnalyzing {dataset['description']} ({dataset['id']})...")
|
144 |
-
result = analyze_dataset(dataset['id'])
|
145 |
-
results.append(result)
|
146 |
-
|
147 |
-
if 'error' in result:
|
148 |
-
logger.error(f"❌ Error: {result['error']}")
|
149 |
-
else:
|
150 |
-
logger.info(f"✓ {result['total_entries']} entries")
|
151 |
-
logger.info(f"✓ {result['file_count']} files")
|
152 |
-
logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
|
153 |
-
logger.info(f"✓ {result['downloads']} downloads")
|
154 |
-
|
155 |
-
if 'pending_requests' in result:
|
156 |
-
logger.info(f"✓ {result['pending_requests']} pending requests")
|
157 |
-
logger.info(f"✓ {result['completed_requests']} completed requests")
|
158 |
-
|
159 |
-
if result['last_modified']:
|
160 |
-
last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
|
161 |
-
logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
|
162 |
-
|
163 |
-
return results
|
164 |
-
|
165 |
-
except Exception as e:
|
166 |
-
logger.error(f"Global error: {str(e)}")
|
167 |
-
return []
|
168 |
-
|
169 |
-
if __name__ == "__main__":
|
170 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/utils/analyze_prod_models.py
DELETED
@@ -1,106 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import logging
|
4 |
-
from datetime import datetime
|
5 |
-
from pathlib import Path
|
6 |
-
from huggingface_hub import HfApi
|
7 |
-
from dotenv import load_dotenv
|
8 |
-
from app.config.hf_config import HF_ORGANIZATION
|
9 |
-
|
10 |
-
# Get the backend directory path
|
11 |
-
BACKEND_DIR = Path(__file__).parent.parent
|
12 |
-
ROOT_DIR = BACKEND_DIR.parent
|
13 |
-
|
14 |
-
# Load environment variables from .env file in root directory
|
15 |
-
load_dotenv(ROOT_DIR / ".env")
|
16 |
-
|
17 |
-
# Configure logging
|
18 |
-
logging.basicConfig(
|
19 |
-
level=logging.INFO,
|
20 |
-
format='%(message)s'
|
21 |
-
)
|
22 |
-
logger = logging.getLogger(__name__)
|
23 |
-
|
24 |
-
# Initialize Hugging Face API
|
25 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
26 |
-
if not HF_TOKEN:
|
27 |
-
raise ValueError("HF_TOKEN not found in environment variables")
|
28 |
-
api = HfApi(token=HF_TOKEN)
|
29 |
-
|
30 |
-
def count_evaluated_models():
|
31 |
-
"""Count the number of evaluated models"""
|
32 |
-
try:
|
33 |
-
# Get dataset info
|
34 |
-
dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset")
|
35 |
-
|
36 |
-
# Get file list
|
37 |
-
files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
|
38 |
-
|
39 |
-
# Get last commit info
|
40 |
-
commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
|
41 |
-
last_commit = next(commits, None)
|
42 |
-
|
43 |
-
# Count lines in jsonl files
|
44 |
-
total_entries = 0
|
45 |
-
for file in files:
|
46 |
-
if file.endswith('.jsonl'):
|
47 |
-
try:
|
48 |
-
# Download file content
|
49 |
-
content = api.hf_hub_download(
|
50 |
-
repo_id=f"{HF_ORGANIZATION}/contents",
|
51 |
-
filename=file,
|
52 |
-
repo_type="dataset"
|
53 |
-
)
|
54 |
-
|
55 |
-
# Count lines
|
56 |
-
with open(content, 'r') as f:
|
57 |
-
for _ in f:
|
58 |
-
total_entries += 1
|
59 |
-
|
60 |
-
except Exception as e:
|
61 |
-
logger.error(f"Error processing file {file}: {str(e)}")
|
62 |
-
continue
|
63 |
-
|
64 |
-
# Build response
|
65 |
-
response = {
|
66 |
-
"total_models": total_entries,
|
67 |
-
"last_modified": last_commit.created_at if last_commit else None,
|
68 |
-
"file_count": len(files),
|
69 |
-
"size_bytes": dataset_info.size_in_bytes,
|
70 |
-
"downloads": dataset_info.downloads
|
71 |
-
}
|
72 |
-
|
73 |
-
return response
|
74 |
-
|
75 |
-
except Exception as e:
|
76 |
-
logger.error(f"Error counting evaluated models: {str(e)}")
|
77 |
-
return {
|
78 |
-
"error": str(e)
|
79 |
-
}
|
80 |
-
|
81 |
-
def main():
|
82 |
-
"""Main function to count evaluated models"""
|
83 |
-
try:
|
84 |
-
logger.info("\nAnalyzing evaluated models...")
|
85 |
-
result = count_evaluated_models()
|
86 |
-
|
87 |
-
if 'error' in result:
|
88 |
-
logger.error(f"❌ Error: {result['error']}")
|
89 |
-
else:
|
90 |
-
logger.info(f"✓ {result['total_models']} models evaluated")
|
91 |
-
logger.info(f"✓ {result['file_count']} files")
|
92 |
-
logger.info(f"✓ {result['size_bytes'] / 1024:.1f} KB")
|
93 |
-
logger.info(f"✓ {result['downloads']} downloads")
|
94 |
-
|
95 |
-
if result['last_modified']:
|
96 |
-
last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
|
97 |
-
logger.info(f"✓ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
|
98 |
-
|
99 |
-
return result
|
100 |
-
|
101 |
-
except Exception as e:
|
102 |
-
logger.error(f"Global error: {str(e)}")
|
103 |
-
return {"error": str(e)}
|
104 |
-
|
105 |
-
if __name__ == "__main__":
|
106 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/utils/fix_wrong_model_size.py
DELETED
@@ -1,110 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import pytz
|
4 |
-
import logging
|
5 |
-
import asyncio
|
6 |
-
from datetime import datetime
|
7 |
-
from pathlib import Path
|
8 |
-
import huggingface_hub
|
9 |
-
from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError
|
10 |
-
from dotenv import load_dotenv
|
11 |
-
from git import Repo
|
12 |
-
from datetime import datetime
|
13 |
-
from tqdm.auto import tqdm
|
14 |
-
from tqdm.contrib.logging import logging_redirect_tqdm
|
15 |
-
|
16 |
-
from app.config.hf_config import HF_TOKEN, API
|
17 |
-
|
18 |
-
from app.utils.model_validation import ModelValidator
|
19 |
-
|
20 |
-
huggingface_hub.logging.set_verbosity_error()
|
21 |
-
huggingface_hub.utils.disable_progress_bars()
|
22 |
-
|
23 |
-
logging.basicConfig(
|
24 |
-
level=logging.ERROR,
|
25 |
-
format='%(message)s'
|
26 |
-
)
|
27 |
-
logger = logging.getLogger(__name__)
|
28 |
-
load_dotenv()
|
29 |
-
|
30 |
-
validator = ModelValidator()
|
31 |
-
|
32 |
-
def get_changed_files(repo_path, start_date, end_date):
|
33 |
-
repo = Repo(repo_path)
|
34 |
-
start = datetime.strptime(start_date, '%Y-%m-%d')
|
35 |
-
end = datetime.strptime(end_date, '%Y-%m-%d')
|
36 |
-
|
37 |
-
changed_files = set()
|
38 |
-
pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}")
|
39 |
-
for commit in pbar:
|
40 |
-
commit_date = datetime.fromtimestamp(commit.committed_date)
|
41 |
-
pbar.set_postfix_str(f"Commit date: {commit_date}")
|
42 |
-
if start <= commit_date <= end:
|
43 |
-
changed_files.update(item.a_path for item in commit.diff(commit.parents[0]))
|
44 |
-
|
45 |
-
if commit_date < start:
|
46 |
-
break
|
47 |
-
|
48 |
-
return changed_files
|
49 |
-
|
50 |
-
|
51 |
-
def read_json(repo_path, file):
|
52 |
-
with open(f"{repo_path}/{file}") as file:
|
53 |
-
return json.load(file)
|
54 |
-
|
55 |
-
|
56 |
-
def write_json(repo_path, file, content):
|
57 |
-
with open(f"{repo_path}/{file}", "w") as file:
|
58 |
-
json.dump(content, file, indent=2)
|
59 |
-
|
60 |
-
|
61 |
-
def main():
|
62 |
-
requests_path = "/requests"
|
63 |
-
start_date = "2024-12-09"
|
64 |
-
end_date = "2025-01-07"
|
65 |
-
|
66 |
-
changed_files = get_changed_files(requests_path, start_date, end_date)
|
67 |
-
|
68 |
-
for file in tqdm(changed_files):
|
69 |
-
try:
|
70 |
-
request_data = read_json(requests_path, file)
|
71 |
-
except FileNotFoundError as e:
|
72 |
-
tqdm.write(f"File {file} not found")
|
73 |
-
continue
|
74 |
-
|
75 |
-
try:
|
76 |
-
model_info = API.model_info(
|
77 |
-
repo_id=request_data["model"],
|
78 |
-
revision=request_data["revision"],
|
79 |
-
token=HF_TOKEN
|
80 |
-
)
|
81 |
-
except (RepositoryNotFoundError, RevisionNotFoundError) as e:
|
82 |
-
tqdm.write(f"Model info for {request_data["model"]} not found")
|
83 |
-
continue
|
84 |
-
|
85 |
-
with logging_redirect_tqdm():
|
86 |
-
new_model_size, error = asyncio.run(validator.get_model_size(
|
87 |
-
model_info=model_info,
|
88 |
-
precision=request_data["precision"],
|
89 |
-
base_model=request_data["base_model"],
|
90 |
-
revision=request_data["revision"]
|
91 |
-
))
|
92 |
-
|
93 |
-
if error:
|
94 |
-
tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}")
|
95 |
-
continue
|
96 |
-
|
97 |
-
old_model_size = request_data["params"]
|
98 |
-
if old_model_size != new_model_size:
|
99 |
-
if new_model_size > 100:
|
100 |
-
tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}")
|
101 |
-
|
102 |
-
tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}")
|
103 |
-
tqdm.write(f"Updating request file {file}")
|
104 |
-
|
105 |
-
request_data["params"] = new_model_size
|
106 |
-
write_json(requests_path, file, content=request_data)
|
107 |
-
|
108 |
-
|
109 |
-
if __name__ == "__main__":
|
110 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/utils/last_activity.py
DELETED
@@ -1,164 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import logging
|
4 |
-
from datetime import datetime
|
5 |
-
from pathlib import Path
|
6 |
-
from typing import Dict, Any, List, Tuple
|
7 |
-
from huggingface_hub import HfApi
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
|
10 |
-
# Get the backend directory path
|
11 |
-
BACKEND_DIR = Path(__file__).parent.parent
|
12 |
-
ROOT_DIR = BACKEND_DIR.parent
|
13 |
-
|
14 |
-
# Load environment variables from .env file in root directory
|
15 |
-
load_dotenv(ROOT_DIR / ".env")
|
16 |
-
|
17 |
-
# Configure logging
|
18 |
-
logging.basicConfig(
|
19 |
-
level=logging.INFO,
|
20 |
-
format='%(message)s'
|
21 |
-
)
|
22 |
-
logger = logging.getLogger(__name__)
|
23 |
-
|
24 |
-
# Initialize Hugging Face API
|
25 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
26 |
-
if not HF_TOKEN:
|
27 |
-
raise ValueError("HF_TOKEN not found in environment variables")
|
28 |
-
api = HfApi(token=HF_TOKEN)
|
29 |
-
|
30 |
-
# Default organization
|
31 |
-
HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'open-llm-leaderboard')
|
32 |
-
|
33 |
-
def get_last_votes(limit: int = 5) -> List[Dict]:
|
34 |
-
"""Get the last votes from the votes dataset"""
|
35 |
-
try:
|
36 |
-
logger.info("\nFetching last votes...")
|
37 |
-
|
38 |
-
# Download and read votes file
|
39 |
-
logger.info("Downloading votes file...")
|
40 |
-
votes_file = api.hf_hub_download(
|
41 |
-
repo_id=f"{HF_ORGANIZATION}/votes",
|
42 |
-
filename="votes_data.jsonl",
|
43 |
-
repo_type="dataset"
|
44 |
-
)
|
45 |
-
|
46 |
-
logger.info("Reading votes file...")
|
47 |
-
votes = []
|
48 |
-
with open(votes_file, 'r') as f:
|
49 |
-
for line in f:
|
50 |
-
try:
|
51 |
-
vote = json.loads(line)
|
52 |
-
votes.append(vote)
|
53 |
-
except json.JSONDecodeError:
|
54 |
-
continue
|
55 |
-
|
56 |
-
# Sort by timestamp and get last n votes
|
57 |
-
logger.info("Sorting votes...")
|
58 |
-
votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
|
59 |
-
last_votes = votes[:limit]
|
60 |
-
|
61 |
-
logger.info(f"✓ Found {len(last_votes)} recent votes")
|
62 |
-
return last_votes
|
63 |
-
|
64 |
-
except Exception as e:
|
65 |
-
logger.error(f"Error reading votes: {str(e)}")
|
66 |
-
return []
|
67 |
-
|
68 |
-
def get_last_models(limit: int = 5) -> List[Dict]:
|
69 |
-
"""Get the last models from the requests dataset using commit history"""
|
70 |
-
try:
|
71 |
-
logger.info("\nFetching last model submissions...")
|
72 |
-
|
73 |
-
# Get commit history
|
74 |
-
logger.info("Getting commit history...")
|
75 |
-
commits = list(api.list_repo_commits(
|
76 |
-
repo_id=f"{HF_ORGANIZATION}/requests",
|
77 |
-
repo_type="dataset"
|
78 |
-
))
|
79 |
-
logger.info(f"Found {len(commits)} commits")
|
80 |
-
|
81 |
-
# Track processed files to avoid duplicates
|
82 |
-
processed_files = set()
|
83 |
-
models = []
|
84 |
-
|
85 |
-
# Process commits until we have enough models
|
86 |
-
for i, commit in enumerate(commits):
|
87 |
-
logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
|
88 |
-
|
89 |
-
# Look at added/modified files in this commit
|
90 |
-
files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
|
91 |
-
if files_to_process:
|
92 |
-
logger.info(f"Found {len(files_to_process)} JSON files in commit")
|
93 |
-
|
94 |
-
for file in files_to_process:
|
95 |
-
if file in processed_files:
|
96 |
-
continue
|
97 |
-
|
98 |
-
processed_files.add(file)
|
99 |
-
logger.info(f"Downloading {file}...")
|
100 |
-
|
101 |
-
try:
|
102 |
-
# Download and read the file
|
103 |
-
content = api.hf_hub_download(
|
104 |
-
repo_id=f"{HF_ORGANIZATION}/requests",
|
105 |
-
filename=file,
|
106 |
-
repo_type="dataset"
|
107 |
-
)
|
108 |
-
|
109 |
-
with open(content, 'r') as f:
|
110 |
-
model_data = json.load(f)
|
111 |
-
models.append(model_data)
|
112 |
-
logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
|
113 |
-
|
114 |
-
if len(models) >= limit:
|
115 |
-
logger.info("Reached desired number of models")
|
116 |
-
break
|
117 |
-
|
118 |
-
except Exception as e:
|
119 |
-
logger.error(f"Error reading file {file}: {str(e)}")
|
120 |
-
continue
|
121 |
-
|
122 |
-
if len(models) >= limit:
|
123 |
-
break
|
124 |
-
|
125 |
-
logger.info(f"✓ Found {len(models)} recent model submissions")
|
126 |
-
return models
|
127 |
-
|
128 |
-
except Exception as e:
|
129 |
-
logger.error(f"Error reading models: {str(e)}")
|
130 |
-
return []
|
131 |
-
|
132 |
-
def main():
|
133 |
-
"""Display last activities from the leaderboard"""
|
134 |
-
try:
|
135 |
-
# Get last votes
|
136 |
-
logger.info("\n=== Last Votes ===")
|
137 |
-
last_votes = get_last_votes()
|
138 |
-
if last_votes:
|
139 |
-
for vote in last_votes:
|
140 |
-
logger.info(f"\nModel: {vote.get('model')}")
|
141 |
-
logger.info(f"User: {vote.get('username')}")
|
142 |
-
logger.info(f"Timestamp: {vote.get('timestamp')}")
|
143 |
-
else:
|
144 |
-
logger.info("No votes found")
|
145 |
-
|
146 |
-
# Get last model submissions
|
147 |
-
logger.info("\n=== Last Model Submissions ===")
|
148 |
-
last_models = get_last_models()
|
149 |
-
if last_models:
|
150 |
-
for model in last_models:
|
151 |
-
logger.info(f"\nModel: {model.get('model')}")
|
152 |
-
logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
|
153 |
-
logger.info(f"Status: {model.get('status', 'Unknown')}")
|
154 |
-
logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
|
155 |
-
logger.info(f"Precision: {model.get('precision', 'Unknown')}")
|
156 |
-
logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
|
157 |
-
else:
|
158 |
-
logger.info("No models found")
|
159 |
-
|
160 |
-
except Exception as e:
|
161 |
-
logger.error(f"Global error: {str(e)}")
|
162 |
-
|
163 |
-
if __name__ == "__main__":
|
164 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/utils/sync_datasets_locally.py
DELETED
@@ -1,130 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import shutil
|
3 |
-
import tempfile
|
4 |
-
import logging
|
5 |
-
from pathlib import Path
|
6 |
-
from huggingface_hub import HfApi, snapshot_download, upload_folder, create_repo
|
7 |
-
from dotenv import load_dotenv
|
8 |
-
|
9 |
-
# Configure source and destination usernames
|
10 |
-
SOURCE_USERNAME = "open-llm-leaderboard"
|
11 |
-
DESTINATION_USERNAME = "tfrere"
|
12 |
-
|
13 |
-
# Get the backend directory path
|
14 |
-
BACKEND_DIR = Path(__file__).parent.parent
|
15 |
-
ROOT_DIR = BACKEND_DIR.parent
|
16 |
-
|
17 |
-
# Load environment variables from .env file in root directory
|
18 |
-
load_dotenv(ROOT_DIR / ".env")
|
19 |
-
|
20 |
-
# Configure logging
|
21 |
-
logging.basicConfig(
|
22 |
-
level=logging.INFO,
|
23 |
-
format='%(message)s'
|
24 |
-
)
|
25 |
-
logger = logging.getLogger(__name__)
|
26 |
-
|
27 |
-
# List of dataset names to sync
|
28 |
-
DATASET_NAMES = [
|
29 |
-
"votes",
|
30 |
-
"results",
|
31 |
-
"requests",
|
32 |
-
"contents",
|
33 |
-
"official-providers",
|
34 |
-
]
|
35 |
-
|
36 |
-
# Build list of datasets with their source and destination paths
|
37 |
-
DATASETS = [
|
38 |
-
(name, f"{SOURCE_USERNAME}/{name}", f"{DESTINATION_USERNAME}/{name}")
|
39 |
-
for name in DATASET_NAMES
|
40 |
-
]
|
41 |
-
|
42 |
-
# Initialize Hugging Face API
|
43 |
-
api = HfApi()
|
44 |
-
|
45 |
-
def ensure_repo_exists(repo_id, token):
|
46 |
-
"""Ensure the repository exists, create it if it doesn't"""
|
47 |
-
try:
|
48 |
-
api.repo_info(repo_id=repo_id, repo_type="dataset")
|
49 |
-
logger.info(f"✓ Repository {repo_id} already exists")
|
50 |
-
except Exception:
|
51 |
-
logger.info(f"Creating repository {repo_id}...")
|
52 |
-
create_repo(
|
53 |
-
repo_id=repo_id,
|
54 |
-
repo_type="dataset",
|
55 |
-
token=token,
|
56 |
-
private=True
|
57 |
-
)
|
58 |
-
logger.info(f"✓ Repository {repo_id} created")
|
59 |
-
|
60 |
-
def process_dataset(dataset_info, token):
|
61 |
-
"""Process a single dataset"""
|
62 |
-
name, source_dataset, destination_dataset = dataset_info
|
63 |
-
try:
|
64 |
-
logger.info(f"\n📥 Processing dataset: {name}")
|
65 |
-
|
66 |
-
# Ensure destination repository exists
|
67 |
-
ensure_repo_exists(destination_dataset, token)
|
68 |
-
|
69 |
-
# Create a temporary directory for this dataset
|
70 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
71 |
-
try:
|
72 |
-
# List files in source dataset
|
73 |
-
logger.info(f"Listing files in {source_dataset}...")
|
74 |
-
files = api.list_repo_files(source_dataset, repo_type="dataset")
|
75 |
-
logger.info(f"Detected structure: {len(files)} files")
|
76 |
-
|
77 |
-
# Download dataset
|
78 |
-
logger.info(f"Downloading from {source_dataset}...")
|
79 |
-
local_dir = snapshot_download(
|
80 |
-
repo_id=source_dataset,
|
81 |
-
repo_type="dataset",
|
82 |
-
local_dir=temp_dir,
|
83 |
-
token=token
|
84 |
-
)
|
85 |
-
logger.info(f"✓ Download complete")
|
86 |
-
|
87 |
-
# Upload to destination while preserving structure
|
88 |
-
logger.info(f"📤 Uploading to {destination_dataset}...")
|
89 |
-
api.upload_folder(
|
90 |
-
folder_path=local_dir,
|
91 |
-
repo_id=destination_dataset,
|
92 |
-
repo_type="dataset",
|
93 |
-
token=token
|
94 |
-
)
|
95 |
-
logger.info(f"✅ {name} copied successfully!")
|
96 |
-
return True
|
97 |
-
|
98 |
-
except Exception as e:
|
99 |
-
logger.error(f"❌ Error processing {name}: {str(e)}")
|
100 |
-
return False
|
101 |
-
|
102 |
-
except Exception as e:
|
103 |
-
logger.error(f"❌ Error for {name}: {str(e)}")
|
104 |
-
return False
|
105 |
-
|
106 |
-
def copy_datasets():
|
107 |
-
try:
|
108 |
-
logger.info("🔑 Checking authentication...")
|
109 |
-
# Get token from .env file
|
110 |
-
token = os.getenv("HF_TOKEN")
|
111 |
-
if not token:
|
112 |
-
raise ValueError("HF_TOKEN not found in .env file")
|
113 |
-
|
114 |
-
# Process datasets sequentially
|
115 |
-
results = []
|
116 |
-
for dataset_info in DATASETS:
|
117 |
-
success = process_dataset(dataset_info, token)
|
118 |
-
results.append((dataset_info[0], success))
|
119 |
-
|
120 |
-
# Print final summary
|
121 |
-
logger.info("\n📊 Final summary:")
|
122 |
-
for dataset, success in results:
|
123 |
-
status = "✅ Success" if success else "❌ Failure"
|
124 |
-
logger.info(f"{dataset}: {status}")
|
125 |
-
|
126 |
-
except Exception as e:
|
127 |
-
logger.error(f"❌ Global error: {str(e)}")
|
128 |
-
|
129 |
-
if __name__ == "__main__":
|
130 |
-
copy_datasets()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/uv.lock
DELETED
The diff for this file is too large to render.
See raw diff
|
|
docker-compose.yml
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
services:
|
2 |
-
backend:
|
3 |
-
build:
|
4 |
-
context: ./backend
|
5 |
-
dockerfile: Dockerfile.dev
|
6 |
-
args:
|
7 |
-
- HF_TOKEN=${HF_TOKEN}
|
8 |
-
ports:
|
9 |
-
- "${BACKEND_PORT:-8000}:8000"
|
10 |
-
volumes:
|
11 |
-
- ./backend:/app
|
12 |
-
environment:
|
13 |
-
- ENVIRONMENT=${ENVIRONMENT:-development}
|
14 |
-
- HF_TOKEN=${HF_TOKEN}
|
15 |
-
- HF_HOME=${HF_HOME:-/.cache}
|
16 |
-
command: uvicorn app.asgi:app --host 0.0.0.0 --port 8000 --reload
|
17 |
-
|
18 |
-
frontend:
|
19 |
-
build:
|
20 |
-
context: ./frontend
|
21 |
-
dockerfile: Dockerfile.dev
|
22 |
-
ports:
|
23 |
-
- "${FRONTEND_PORT:-7860}:7860"
|
24 |
-
volumes:
|
25 |
-
- ./frontend:/app
|
26 |
-
- /app/node_modules
|
27 |
-
environment:
|
28 |
-
- NODE_ENV=${ENVIRONMENT:-development}
|
29 |
-
- CHOKIDAR_USEPOLLING=true
|
30 |
-
- PORT=${FRONTEND_PORT:-7860}
|
31 |
-
command: npm start
|
32 |
-
stdin_open: true
|
33 |
-
tty: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/Dockerfile.dev
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
FROM node:18
|
2 |
-
|
3 |
-
WORKDIR /app
|
4 |
-
|
5 |
-
# Install required global dependencies
|
6 |
-
RUN npm install -g react-scripts
|
7 |
-
|
8 |
-
# Copy package.json and package-lock.json
|
9 |
-
COPY package*.json ./
|
10 |
-
|
11 |
-
# Install project dependencies
|
12 |
-
RUN npm install
|
13 |
-
|
14 |
-
# Volume will be mounted here, no need for COPY
|
15 |
-
CMD ["npm", "start"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/README.md
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
# Frontend - Open LLM Leaderboard 🏆
|
2 |
-
|
3 |
-
React interface for exploring and comparing open-source language models.
|
4 |
-
|
5 |
-
## 🏗 Architecture
|
6 |
-
|
7 |
-
```mermaid
|
8 |
-
flowchart TD
|
9 |
-
Client(["User Browser"]) --> Components["React Components"]
|
10 |
-
|
11 |
-
subgraph Frontend
|
12 |
-
Components --> Context["Context Layer<br>• LeaderboardContext<br>• Global State"]
|
13 |
-
|
14 |
-
API["API Layer<br>• /api/leaderboard/formatted<br>• TanStack Query"] --> |Data Feed| Context
|
15 |
-
|
16 |
-
Context --> Hooks["Hooks Layer<br>• Data Processing<br>• Filtering<br>• Caching"]
|
17 |
-
|
18 |
-
Hooks --> Features["Features<br>• Table Management<br>• Search & Filters<br>• Display Options"]
|
19 |
-
Features --> Cache["Cache Layer<br>• LocalStorage<br>• URL State"]
|
20 |
-
end
|
21 |
-
|
22 |
-
API --> Backend["Backend Server"]
|
23 |
-
|
24 |
-
style Backend fill:#f96,stroke:#333,stroke-width:2px
|
25 |
-
```
|
26 |
-
|
27 |
-
## ✨ Core Features
|
28 |
-
|
29 |
-
- 🔍 **Search & Filters**: Real-time filtering, regex search, advanced filters
|
30 |
-
- 📊 **Data Visualization**: Interactive table, customizable columns, sorting
|
31 |
-
- 🔄 **State Management**: URL sync, client-side caching (5min TTL)
|
32 |
-
- 📱 **Responsive Design**: Mobile-friendly, dark/light themes
|
33 |
-
|
34 |
-
## 🛠 Tech Stack
|
35 |
-
|
36 |
-
- React 18 + Material-UI
|
37 |
-
- TanStack Query & Table
|
38 |
-
- React Router v6
|
39 |
-
|
40 |
-
## 📁 Project Structure
|
41 |
-
|
42 |
-
```
|
43 |
-
src/
|
44 |
-
├── pages/
|
45 |
-
│ └── LeaderboardPage/
|
46 |
-
│ ├── components/ # UI Components
|
47 |
-
│ ├── context/ # Global State
|
48 |
-
│ └── hooks/ # Data Processing
|
49 |
-
├── components/ # Shared Components
|
50 |
-
└── utils/ # Helper Functions
|
51 |
-
```
|
52 |
-
|
53 |
-
## 🚀 Development
|
54 |
-
|
55 |
-
```bash
|
56 |
-
# Install dependencies
|
57 |
-
npm install
|
58 |
-
|
59 |
-
# Start development server
|
60 |
-
npm start
|
61 |
-
|
62 |
-
# Production build
|
63 |
-
npm run build
|
64 |
-
```
|
65 |
-
|
66 |
-
## 🔧 Environment Variables
|
67 |
-
|
68 |
-
```env
|
69 |
-
# API Configuration
|
70 |
-
REACT_APP_API_URL=http://localhost:8000
|
71 |
-
REACT_APP_CACHE_DURATION=300000 # 5 minutes
|
72 |
-
```
|
73 |
-
|
74 |
-
## 🔄 Data Flow
|
75 |
-
|
76 |
-
1. API fetches leaderboard data from backend
|
77 |
-
2. Context stores and manages global state
|
78 |
-
3. Hooks handle data processing and filtering
|
79 |
-
4. Components render based on processed data
|
80 |
-
5. Cache maintains user preferences and URL state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/package.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"name": "open-llm-leaderboard",
|
3 |
-
"version": "0.1.0",
|
4 |
-
"private": true,
|
5 |
-
"dependencies": {
|
6 |
-
"@emotion/react": "^11.13.3",
|
7 |
-
"@emotion/styled": "^11.13.0",
|
8 |
-
"@huggingface/hub": "^0.14.0",
|
9 |
-
"@mui/icons-material": "^6.1.7",
|
10 |
-
"@mui/lab": "^6.0.0-beta.16",
|
11 |
-
"@mui/material": "^6.1.6",
|
12 |
-
"@mui/x-data-grid": "^7.22.2",
|
13 |
-
"@tanstack/react-query": "^5.62.2",
|
14 |
-
"@tanstack/react-table": "^8.20.5",
|
15 |
-
"@tanstack/react-virtual": "^3.10.9",
|
16 |
-
"@testing-library/jest-dom": "^5.17.0",
|
17 |
-
"@testing-library/react": "^13.4.0",
|
18 |
-
"@testing-library/user-event": "^13.5.0",
|
19 |
-
"compression": "^1.7.4",
|
20 |
-
"cors": "^2.8.5",
|
21 |
-
"express": "^4.18.2",
|
22 |
-
"react": "^18.3.1",
|
23 |
-
"react-dom": "^18.3.1",
|
24 |
-
"react-router-dom": "^6.28.0",
|
25 |
-
"react-scripts": "5.0.1",
|
26 |
-
"serve-static": "^1.15.0",
|
27 |
-
"web-vitals": "^2.1.4"
|
28 |
-
},
|
29 |
-
"scripts": {
|
30 |
-
"start": "react-scripts start",
|
31 |
-
"build": "react-scripts build",
|
32 |
-
"test": "react-scripts test",
|
33 |
-
"eject": "react-scripts eject",
|
34 |
-
"serve": "node server.js"
|
35 |
-
},
|
36 |
-
"eslintConfig": {
|
37 |
-
"extends": [
|
38 |
-
"react-app",
|
39 |
-
"react-app/jest"
|
40 |
-
]
|
41 |
-
},
|
42 |
-
"browserslist": {
|
43 |
-
"production": [
|
44 |
-
">0.2%",
|
45 |
-
"not dead",
|
46 |
-
"not op_mini all"
|
47 |
-
],
|
48 |
-
"development": [
|
49 |
-
"last 1 chrome version",
|
50 |
-
"last 1 firefox version",
|
51 |
-
"last 1 safari version"
|
52 |
-
]
|
53 |
-
},
|
54 |
-
"proxy": "http://backend:8000"
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/public/index.html
DELETED
@@ -1,96 +0,0 @@
|
|
1 |
-
<!DOCTYPE html>
|
2 |
-
<html lang="en">
|
3 |
-
<head>
|
4 |
-
<meta charset="utf-8" />
|
5 |
-
<link rel="icon" href="%PUBLIC_URL%/logo32.png" />
|
6 |
-
<meta
|
7 |
-
name="viewport"
|
8 |
-
content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
|
9 |
-
/>
|
10 |
-
<meta
|
11 |
-
name="description"
|
12 |
-
content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
|
13 |
-
/>
|
14 |
-
|
15 |
-
<!-- Open Graph / Facebook -->
|
16 |
-
<meta property="og:type" content="website" />
|
17 |
-
<meta
|
18 |
-
property="og:url"
|
19 |
-
content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
|
20 |
-
/>
|
21 |
-
<meta
|
22 |
-
property="og:title"
|
23 |
-
content="Open LLM Leaderboard - Compare Open Source Large Language Models"
|
24 |
-
/>
|
25 |
-
<meta
|
26 |
-
property="og:description"
|
27 |
-
content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
|
28 |
-
/>
|
29 |
-
<meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
|
30 |
-
|
31 |
-
<!-- Twitter -->
|
32 |
-
<meta property="twitter:card" content="summary_large_image" />
|
33 |
-
<meta
|
34 |
-
property="twitter:url"
|
35 |
-
content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
|
36 |
-
/>
|
37 |
-
<meta
|
38 |
-
property="twitter:title"
|
39 |
-
content="Open LLM Leaderboard - Compare Open Source Large Language Models"
|
40 |
-
/>
|
41 |
-
<meta
|
42 |
-
property="twitter:description"
|
43 |
-
content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
|
44 |
-
/>
|
45 |
-
<meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
|
46 |
-
<!--
|
47 |
-
Notice the use of %PUBLIC_URL% in the tags above.
|
48 |
-
It will be replaced with the URL of the `public` folder during the build.
|
49 |
-
Only files inside the `public` folder can be referenced from the HTML.
|
50 |
-
|
51 |
-
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
52 |
-
work correctly both with client-side routing and a non-root public URL.
|
53 |
-
Learn how to configure a non-root public URL by running `npm run build`.
|
54 |
-
-->
|
55 |
-
<title>
|
56 |
-
Open LLM Leaderboard - Compare Open Source Large Language Models
|
57 |
-
</title>
|
58 |
-
<link
|
59 |
-
href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
|
60 |
-
rel="stylesheet"
|
61 |
-
/>
|
62 |
-
<style>
|
63 |
-
html,
|
64 |
-
body {
|
65 |
-
position: fixed;
|
66 |
-
width: 100%;
|
67 |
-
height: 100%;
|
68 |
-
overflow: hidden;
|
69 |
-
-webkit-overflow-scrolling: touch;
|
70 |
-
}
|
71 |
-
#root {
|
72 |
-
position: absolute;
|
73 |
-
top: 0;
|
74 |
-
left: 0;
|
75 |
-
right: 0;
|
76 |
-
bottom: 0;
|
77 |
-
overflow-y: auto;
|
78 |
-
-webkit-overflow-scrolling: touch;
|
79 |
-
}
|
80 |
-
</style>
|
81 |
-
</head>
|
82 |
-
<body>
|
83 |
-
<noscript>You need to enable JavaScript to run this app.</noscript>
|
84 |
-
<div id="root"></div>
|
85 |
-
<!--
|
86 |
-
This HTML file is a template.
|
87 |
-
If you open it directly in the browser, you will see an empty page.
|
88 |
-
|
89 |
-
You can add webfonts, meta tags, or analytics to this file.
|
90 |
-
The build step will place the bundled scripts into the <body> tag.
|
91 |
-
|
92 |
-
To begin the development, run `npm start` or `yarn start`.
|
93 |
-
To create a production bundle, use `npm run build` or `yarn build`.
|
94 |
-
-->
|
95 |
-
</body>
|
96 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/public/logo256.png
DELETED
Binary file (24.6 kB)
|
|
frontend/public/logo32.png
DELETED
Binary file (1.96 kB)
|
|
frontend/public/og-image.jpg
DELETED
Binary file (13.8 kB)
|
|