JeffYang52415
commited on
feat: docker compose
Browse files- .dockerignore +20 -0
- .github/workflows/ci.yml +34 -5
- CHANGELOG.md +16 -0
- Makefile +66 -9
- app.py +0 -6
- docker-compose.yml +34 -0
- nginx.conf +58 -0
.dockerignore
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.git
|
2 |
+
.gitignore
|
3 |
+
.env
|
4 |
+
.venv
|
5 |
+
__pycache__
|
6 |
+
*.pyc
|
7 |
+
*.pyo
|
8 |
+
*.pyd
|
9 |
+
.Python
|
10 |
+
*.py[cod]
|
11 |
+
*$py.class
|
12 |
+
.pytest_cache
|
13 |
+
.coverage
|
14 |
+
htmlcov
|
15 |
+
.mypy_cache
|
16 |
+
.ruff_cache
|
17 |
+
.DS_Store
|
18 |
+
notebooks/
|
19 |
+
tests/
|
20 |
+
docs/
|
.github/workflows/ci.yml
CHANGED
@@ -9,13 +9,18 @@ on:
|
|
9 |
jobs:
|
10 |
test:
|
11 |
runs-on: ubuntu-latest
|
|
|
|
|
|
|
|
|
|
|
12 |
steps:
|
13 |
- uses: actions/checkout@v4
|
14 |
|
15 |
-
- name: Set up Python
|
16 |
uses: actions/setup-python@v5
|
17 |
with:
|
18 |
-
python-version:
|
19 |
cache: "pip"
|
20 |
|
21 |
- name: Install Poetry
|
@@ -27,9 +32,14 @@ jobs:
|
|
27 |
poetry config virtualenvs.create true
|
28 |
poetry config virtualenvs.in-project true
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
- name: Install dependencies
|
31 |
run: |
|
32 |
-
poetry lock --no-update
|
33 |
poetry install
|
34 |
|
35 |
- name: Run pre-commit hooks
|
@@ -39,5 +49,24 @@ jobs:
|
|
39 |
env:
|
40 |
PRE_COMMIT_CACHE_KEY: ${{ hashFiles('.pre-commit-config.yaml', 'pyproject.toml') }}
|
41 |
|
42 |
-
- name: Run tests
|
43 |
-
run:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
jobs:
|
10 |
test:
|
11 |
runs-on: ubuntu-latest
|
12 |
+
strategy:
|
13 |
+
matrix:
|
14 |
+
python-version: ["3.10", "3.11", "3.12"]
|
15 |
+
fail-fast: false
|
16 |
+
|
17 |
steps:
|
18 |
- uses: actions/checkout@v4
|
19 |
|
20 |
+
- name: Set up Python ${{ matrix.python-version }}
|
21 |
uses: actions/setup-python@v5
|
22 |
with:
|
23 |
+
python-version: ${{ matrix.python-version }}
|
24 |
cache: "pip"
|
25 |
|
26 |
- name: Install Poetry
|
|
|
32 |
poetry config virtualenvs.create true
|
33 |
poetry config virtualenvs.in-project true
|
34 |
|
35 |
+
- name: Cache Poetry virtualenv
|
36 |
+
uses: actions/cache@v3
|
37 |
+
with:
|
38 |
+
path: ./.venv
|
39 |
+
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
|
40 |
+
|
41 |
- name: Install dependencies
|
42 |
run: |
|
|
|
43 |
poetry install
|
44 |
|
45 |
- name: Run pre-commit hooks
|
|
|
49 |
env:
|
50 |
PRE_COMMIT_CACHE_KEY: ${{ hashFiles('.pre-commit-config.yaml', 'pyproject.toml') }}
|
51 |
|
52 |
+
- name: Run tests with coverage
|
53 |
+
run: |
|
54 |
+
poetry run pytest --cov=llmdataparser --cov-report=xml
|
55 |
+
|
56 |
+
- name: Upload coverage to Codecov
|
57 |
+
uses: codecov/codecov-action@v3
|
58 |
+
with:
|
59 |
+
file: ./coverage.xml
|
60 |
+
fail_ci_if_error: true
|
61 |
+
|
62 |
+
- name: Build documentation
|
63 |
+
run: |
|
64 |
+
poetry run mkdocs build
|
65 |
+
if: matrix.python-version == '3.12'
|
66 |
+
|
67 |
+
- name: Upload documentation artifact
|
68 |
+
uses: actions/upload-artifact@v3
|
69 |
+
with:
|
70 |
+
name: documentation
|
71 |
+
path: site/
|
72 |
+
if: matrix.python-version == '3.12'
|
CHANGELOG.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Changelog
|
2 |
+
|
3 |
+
All notable changes to this project will be documented in this file.
|
4 |
+
|
5 |
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6 |
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7 |
+
|
8 |
+
## \[1.0.0\] - 2024-12-30
|
9 |
+
|
10 |
+
### Added
|
11 |
+
|
12 |
+
- Initial release
|
13 |
+
- Support for multiple benchmark datasets (MMLU, GSM8k, etc.)
|
14 |
+
- Gradio interface for dataset exploration
|
15 |
+
- Comprehensive test suite
|
16 |
+
- Documentation and examples
|
Makefile
CHANGED
@@ -1,8 +1,13 @@
|
|
|
|
1 |
# Variables
|
|
|
2 |
IMAGE_NAME = llmdataparser
|
3 |
CONTAINER_NAME = llmdataparser
|
4 |
VERSION = latest
|
5 |
|
|
|
|
|
|
|
6 |
# Build the Docker image
|
7 |
build:
|
8 |
docker build -t $(IMAGE_NAME):$(VERSION) .
|
@@ -23,28 +28,80 @@ rm:
|
|
23 |
rmi:
|
24 |
docker rmi $(IMAGE_NAME):$(VERSION)
|
25 |
|
26 |
-
#
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
up: build run
|
31 |
|
32 |
# Stop and remove container
|
33 |
down: stop rm
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# Show container logs
|
36 |
logs:
|
37 |
docker logs $(CONTAINER_NAME)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Enter container shell
|
40 |
shell:
|
41 |
docker exec -it $(CONTAINER_NAME) /bin/bash
|
42 |
|
43 |
-
#
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
1 |
+
# -----------------------------
|
2 |
# Variables
|
3 |
+
# -----------------------------
|
4 |
IMAGE_NAME = llmdataparser
|
5 |
CONTAINER_NAME = llmdataparser
|
6 |
VERSION = latest
|
7 |
|
8 |
+
# -----------------------------
|
9 |
+
# Docker Basic Commands
|
10 |
+
# -----------------------------
|
11 |
# Build the Docker image
|
12 |
build:
|
13 |
docker build -t $(IMAGE_NAME):$(VERSION) .
|
|
|
28 |
rmi:
|
29 |
docker rmi $(IMAGE_NAME):$(VERSION)
|
30 |
|
31 |
+
# -----------------------------
|
32 |
+
# Docker Compose Commands
|
33 |
+
# -----------------------------
|
34 |
+
# Start with docker-compose (development)
|
35 |
+
compose-up:
|
36 |
+
docker compose up -d
|
37 |
+
|
38 |
+
# Stop and remove containers
|
39 |
+
compose-down:
|
40 |
+
docker compose down
|
41 |
+
|
42 |
+
# View logs
|
43 |
+
compose-logs:
|
44 |
+
docker compose logs -f
|
45 |
+
|
46 |
+
# Rebuild containers
|
47 |
+
compose-build:
|
48 |
+
docker compose build
|
49 |
|
50 |
+
# Restart containers
|
51 |
+
compose-restart:
|
52 |
+
docker compose restart
|
53 |
+
|
54 |
+
# -----------------------------
|
55 |
+
# Convenience Commands
|
56 |
+
# -----------------------------
|
57 |
+
# Build and run with docker
|
58 |
up: build run
|
59 |
|
60 |
# Stop and remove container
|
61 |
down: stop rm
|
62 |
|
63 |
+
# Clean everything
|
64 |
+
clean: stop rm rmi
|
65 |
+
|
66 |
+
# -----------------------------
|
67 |
+
# Monitoring Commands
|
68 |
+
# -----------------------------
|
69 |
# Show container logs
|
70 |
logs:
|
71 |
docker logs $(CONTAINER_NAME)
|
72 |
|
73 |
+
# Follow container logs
|
74 |
+
logs-follow:
|
75 |
+
docker logs -f $(CONTAINER_NAME)
|
76 |
+
|
77 |
+
# Show container status
|
78 |
+
status:
|
79 |
+
docker ps -a | grep $(CONTAINER_NAME)
|
80 |
+
|
81 |
# Enter container shell
|
82 |
shell:
|
83 |
docker exec -it $(CONTAINER_NAME) /bin/bash
|
84 |
|
85 |
+
# -----------------------------
|
86 |
+
# Production Commands
|
87 |
+
# -----------------------------
|
88 |
+
# Test nginx configuration (for production use)
|
89 |
+
nginx-test:
|
90 |
+
docker compose run --rm nginx nginx -t
|
91 |
|
92 |
+
# Start with nginx test (for production use)
|
93 |
+
compose-up-prod: nginx-test compose-up
|
94 |
+
|
95 |
+
# -----------------------------
|
96 |
+
# Security Commands
|
97 |
+
# -----------------------------
|
98 |
+
security-check:
|
99 |
+
@echo "Checking nginx configuration..."
|
100 |
+
docker compose run --rm nginx nginx -t
|
101 |
+
@echo "Checking exposed ports..."
|
102 |
+
docker compose config | grep -E "ports:|127.0.0.1"
|
103 |
|
104 |
+
# Ensure all targets are treated as commands, not files
|
105 |
+
.PHONY: build run stop rm rmi clean up down logs shell \
|
106 |
+
compose-up compose-down compose-logs compose-build compose-restart \
|
107 |
+
nginx-test status logs-follow compose-up-prod
|
app.py
CHANGED
@@ -390,13 +390,7 @@ if __name__ == "__main__":
|
|
390 |
demo = create_interface()
|
391 |
try:
|
392 |
demo.launch(
|
393 |
-
server_port=7860,
|
394 |
-
auth=None,
|
395 |
-
ssl_keyfile=None,
|
396 |
-
ssl_certfile=None,
|
397 |
show_error=True, # Changed to True for debugging
|
398 |
-
share=False,
|
399 |
-
max_threads=40,
|
400 |
)
|
401 |
except Exception as e:
|
402 |
print(f"Error launching Gradio: {e}") # Add error logging
|
|
|
390 |
demo = create_interface()
|
391 |
try:
|
392 |
demo.launch(
|
|
|
|
|
|
|
|
|
393 |
show_error=True, # Changed to True for debugging
|
|
|
|
|
394 |
)
|
395 |
except Exception as e:
|
396 |
print(f"Error launching Gradio: {e}") # Add error logging
|
docker-compose.yml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "3.8"
|
2 |
+
|
3 |
+
services:
|
4 |
+
llmdataparser:
|
5 |
+
build: .
|
6 |
+
environment:
|
7 |
+
- GRADIO_SERVER_PORT=7860
|
8 |
+
volumes:
|
9 |
+
- .:/app
|
10 |
+
- huggingface_cache:/app/.cache/huggingface
|
11 |
+
healthcheck:
|
12 |
+
test: ["CMD", "curl", "-f", "http://127.0.0.1:7860"]
|
13 |
+
interval: 30s
|
14 |
+
timeout: 10s
|
15 |
+
retries: 3
|
16 |
+
networks:
|
17 |
+
- internal
|
18 |
+
|
19 |
+
nginx:
|
20 |
+
image: nginx:alpine
|
21 |
+
ports:
|
22 |
+
- "80:80"
|
23 |
+
volumes:
|
24 |
+
- ./nginx.conf:/etc/nginx/nginx.conf:ro
|
25 |
+
depends_on:
|
26 |
+
- llmdataparser
|
27 |
+
networks:
|
28 |
+
- internal
|
29 |
+
|
30 |
+
networks:
|
31 |
+
internal:
|
32 |
+
|
33 |
+
volumes:
|
34 |
+
huggingface_cache:
|
nginx.conf
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
events {
|
2 |
+
worker_connections 1024;
|
3 |
+
}
|
4 |
+
|
5 |
+
http {
|
6 |
+
# Basic security settings
|
7 |
+
server_tokens off; # Don't show nginx version
|
8 |
+
client_max_body_size 10M; # Limit request size
|
9 |
+
client_body_timeout 12;
|
10 |
+
client_header_timeout 12;
|
11 |
+
|
12 |
+
upstream gradio_app {
|
13 |
+
server llmdataparser:7860;
|
14 |
+
keepalive 32;
|
15 |
+
}
|
16 |
+
|
17 |
+
server {
|
18 |
+
listen 80;
|
19 |
+
server_name localhost;
|
20 |
+
|
21 |
+
# Enhanced security headers
|
22 |
+
add_header X-Frame-Options "SAMEORIGIN" always;
|
23 |
+
add_header X-Content-Type-Options "nosniff" always;
|
24 |
+
add_header X-XSS-Protection "1; mode=block" always;
|
25 |
+
add_header Referrer-Policy "strict-origin-always" always;
|
26 |
+
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline';" always;
|
27 |
+
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
|
28 |
+
|
29 |
+
location / {
|
30 |
+
proxy_pass http://gradio_app;
|
31 |
+
proxy_set_header Host $host;
|
32 |
+
proxy_set_header X-Real-IP $remote_addr;
|
33 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
34 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
35 |
+
|
36 |
+
# WebSocket support
|
37 |
+
proxy_http_version 1.1;
|
38 |
+
proxy_set_header Upgrade $http_upgrade;
|
39 |
+
proxy_set_header Connection "upgrade";
|
40 |
+
|
41 |
+
# Timeouts
|
42 |
+
proxy_connect_timeout 60s;
|
43 |
+
proxy_send_timeout 60s;
|
44 |
+
proxy_read_timeout 60s;
|
45 |
+
|
46 |
+
# Security
|
47 |
+
proxy_buffering on;
|
48 |
+
proxy_buffer_size 8k;
|
49 |
+
proxy_buffers 8 8k;
|
50 |
+
}
|
51 |
+
|
52 |
+
# Deny access to hidden files
|
53 |
+
location ~ /\. {
|
54 |
+
deny all;
|
55 |
+
return 404;
|
56 |
+
}
|
57 |
+
}
|
58 |
+
}
|