Spaces:
Runtime error
Runtime error
File size: 2,813 Bytes
a3f0b6e 7c37fc5 3fa4d71 53f077b a3f0b6e 7c37fc5 c6e88ba 7c37fc5 beb5662 36ed17a 7c37fc5 035df3d 1b0bd15 7c37fc5 240148f 7c37fc5 53f077b 7c37fc5 240148f 7c37fc5 36ed17a 7c37fc5 beb5662 035df3d 1b0bd15 7c37fc5 1b0bd15 53f077b 1b0bd15 53f077b 1b0bd15 53f077b 1b0bd15 035df3d 1b0bd15 24e4bf5 1b0bd15 240148f 035df3d 1b0bd15 a3f0b6e 7c37fc5 a3f0b6e 7c37fc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
services:
train:
build:
context: .
command: |
python -m src.train_optuna_callbacks experiment=catdog_experiment ++task_name=train ++train=True ++test=False && \
python -m src.create_artifacts && \
touch ./checkpoints/train_done.flag
volumes:
- ./data:/app/data
- ./checkpoints:/app/checkpoints
- ./artifacts:/app/artifacts
- ./logs:/app/logs
environment:
- PYTHONUNBUFFERED=1
- PYTHONPATH=/app
shm_size: '4g'
networks:
- default
env_file:
- .env
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
eval:
build:
context: .
command: |
sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.train_optuna_callbacks experiment=catdog_experiment ++task_name=test ++train=False ++test=True'
volumes:
- ./data:/app/data
- ./checkpoints:/app/checkpoints
- ./artifacts:/app/artifacts
- ./logs:/app/logs
environment:
- PYTHONUNBUFFERED=1
- PYTHONPATH=/app
shm_size: '4g'
networks:
- default
env_file:
- .env
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
server:
build:
context: .
command: |
sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.server'
volumes:
- ./data:/app/data
- ./checkpoints:/app/checkpoints
- ./artifacts:/app/artifacts
- ./logs:/app/logs
environment:
- PYTHONUNBUFFERED=1
- PYTHONPATH=/app
- SERVER_URL=http://localhost:8080
shm_size: '4g'
networks:
- default
env_file:
- .env
ports:
- "8080:8080"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
client:
build:
context: .
command: |
sh -c 'until curl -s http://server:8080/health; do echo "Waiting for server to be ready..."; sleep 5; done && \
./run_client.sh'
volumes:
- ./data:/app/data
- ./checkpoints:/app/checkpoints
- ./artifacts:/app/artifacts
- ./logs:/app/logs
environment:
- PYTHONUNBUFFERED=1
- PYTHONPATH=/app
- SERVER_URL=http://server:8080
shm_size: '4g'
networks:
- default
env_file:
- .env
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
volumes:
data:
checkpoints:
artifacts:
logs:
networks:
default:
|