services: train: build: context: . command: | python -m src.train_optuna_callbacks experiment=catdog_experiment_resnet ++task_name=train ++train=True ++test=False && \ python -m src.create_artifacts && \ touch ./checkpoints/train_done.flag volumes: - ./data:/app/data - ./checkpoints:/app/checkpoints - ./artifacts:/app/artifacts - ./logs:/app/logs environment: - PYTHONUNBUFFERED=1 - PYTHONPATH=/app shm_size: '4g' networks: - default env_file: - .env deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] eval: build: context: . command: | sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.train_optuna_callbacks experiment=catdog_experiment_resnet ++task_name=test ++train=False ++test=True' volumes: - ./data:/app/data - ./checkpoints:/app/checkpoints - ./artifacts:/app/artifacts - ./logs:/app/logs environment: - PYTHONUNBUFFERED=1 - PYTHONPATH=/app shm_size: '4g' networks: - default env_file: - .env deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] inference: build: context: . command: | sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.infer experiment=catdog_experiment_resnet' volumes: - ./data:/app/data - ./checkpoints:/app/checkpoints - ./artifacts:/app/artifacts - ./logs:/app/logs environment: - PYTHONUNBUFFERED=1 - PYTHONPATH=/app shm_size: '4g' networks: - default env_file: - .env deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] volumes: data: checkpoints: artifacts: logs: networks: default: