|
#!/bin/bash |
|
|
|
if [[ "$QUANTIZATION" == "false" ]]; then |
|
text-generation-launcher --model-id $MODEL_NAME \ |
|
--num-shard 1 --port 8080 --trust-remote-code \ |
|
--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \ |
|
--max-input-length $MAX_INPUT_LENGTH \ |
|
--max-total-tokens $MAX_TOTAL_TOKENS \ |
|
& |
|
else |
|
text-generation-launcher --model-id $MODEL_NAME \ |
|
--num-shard 1 --port 8080 --trust-remote-code \ |
|
--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \ |
|
--max-input-length $MAX_INPUT_LENGTH \ |
|
--max-total-tokens $MAX_TOTAL_TOKENS \ |
|
--quantize $QUANTIZATION \ |
|
& |
|
fi |
|
|
|
|
|
curl --retry 60 --retry-delay 10 --retry-connrefused http://127.0.0.1:8080/health |
|
|
|
|
|
python app/main.py --port $GRADIO_PORT & |
|
|
|
|
|
wait -n |
|
|
|
|
|
exit $? |