Spaces:
Runtime error
Runtime error
services: | |
train: | |
build: | |
context: . | |
command: | | |
python -m src.train_optuna_callbacks experiment=catdog_experiment ++task_name=train ++train=True ++test=False && \ | |
python -m src.create_artifacts && \ | |
touch ./checkpoints/train_done.flag | |
volumes: | |
- ./data:/app/data | |
- ./checkpoints:/app/checkpoints | |
- ./artifacts:/app/artifacts | |
- ./logs:/app/logs | |
environment: | |
- PYTHONUNBUFFERED=1 | |
- PYTHONPATH=/app | |
shm_size: '4g' | |
networks: | |
- default | |
env_file: | |
- .env | |
deploy: | |
resources: | |
reservations: | |
devices: | |
- driver: nvidia | |
count: 1 | |
capabilities: [gpu] | |
eval: | |
build: | |
context: . | |
command: | | |
sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.train_optuna_callbacks experiment=catdog_experiment ++task_name=test ++train=False ++test=True' | |
volumes: | |
- ./data:/app/data | |
- ./checkpoints:/app/checkpoints | |
- ./artifacts:/app/artifacts | |
- ./logs:/app/logs | |
environment: | |
- PYTHONUNBUFFERED=1 | |
- PYTHONPATH=/app | |
shm_size: '4g' | |
networks: | |
- default | |
env_file: | |
- .env | |
deploy: | |
resources: | |
reservations: | |
devices: | |
- driver: nvidia | |
count: 1 | |
capabilities: [gpu] | |
server: | |
build: | |
context: . | |
command: | | |
sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.server' | |
volumes: | |
- ./data:/app/data | |
- ./checkpoints:/app/checkpoints | |
- ./artifacts:/app/artifacts | |
- ./logs:/app/logs | |
environment: | |
- PYTHONUNBUFFERED=1 | |
- PYTHONPATH=/app | |
- SERVER_URL=http://localhost:8080 | |
shm_size: '4g' | |
networks: | |
- default | |
env_file: | |
- .env | |
ports: | |
- "8080:8080" | |
deploy: | |
resources: | |
reservations: | |
devices: | |
- driver: nvidia | |
count: 1 | |
capabilities: [gpu] | |
client: | |
build: | |
context: . | |
command: | | |
sh -c 'until curl -s http://server:8080/health; do echo "Waiting for server to be ready..."; sleep 5; done && \ | |
./run_client.sh' | |
volumes: | |
- ./data:/app/data | |
- ./checkpoints:/app/checkpoints | |
- ./artifacts:/app/artifacts | |
- ./logs:/app/logs | |
environment: | |
- PYTHONUNBUFFERED=1 | |
- PYTHONPATH=/app | |
- SERVER_URL=http://server:8080 | |
shm_size: '4g' | |
networks: | |
- default | |
env_file: | |
- .env | |
deploy: | |
resources: | |
reservations: | |
devices: | |
- driver: nvidia | |
count: 1 | |
capabilities: [gpu] | |
volumes: | |
data: | |
checkpoints: | |
artifacts: | |
logs: | |
networks: | |
default: | |