|
#!/usr/bin/env bash |
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" |
|
PIDFILE="$SCRIPT_DIR/.run.pid" |
|
LOG_DIR="$SCRIPT_DIR/logs" |
|
MAX_RESTARTS=3 |
|
|
|
mkdir -p "$LOG_DIR" |
|
export PYTHONPATH="$SCRIPT_DIR:$PYTHONPATH" |
|
|
|
|
|
|
|
|
|
declare -A SERVICE_CMDS=( |
|
[nginx]="nginx -p $SCRIPT_DIR -c $SCRIPT_DIR/nginx.conf" |
|
[taproot_dispatcher]="taproot dispatcher --config $SCRIPT_DIR/dispatcher.yaml --add-import anachrovox --debug" |
|
[taproot_overseer]="taproot overseer --config $SCRIPT_DIR/overseer.yaml --debug" |
|
) |
|
|
|
declare -A SERVICE_LOGS_STDOUT=( |
|
[nginx]="${LOG_DIR}/nginx.log" |
|
[taproot_dispatcher]="${LOG_DIR}/taproot_dispatcher.log" |
|
[taproot_overseer]="${LOG_DIR}/taproot_overseer.log" |
|
) |
|
|
|
declare -A SERVICE_LOGS_STDERR=( |
|
[nginx]="${LOG_DIR}/nginx_err.log" |
|
[taproot_dispatcher]="${LOG_DIR}/taproot_dispatcher_err.log" |
|
[taproot_overseer]="${LOG_DIR}/taproot_overseer_err.log" |
|
) |
|
|
|
declare -A SERVICE_PIDFILES=( |
|
[nginx]="${SCRIPT_DIR}/.nginx.pid" |
|
[taproot_dispatcher]="${SCRIPT_DIR}/.dispatcher.pid" |
|
[taproot_overseer]="${SCRIPT_DIR}/.overseer.pid" |
|
) |
|
|
|
|
|
declare -A SERVICE_PIDS |
|
|
|
|
|
declare -A SERVICE_RESTART_COUNT |
|
|
|
|
|
START_TIME=$(date +%s.%N) |
|
|
|
|
|
timestamp_echo() { |
|
local current_time=$(date +%s.%N) |
|
local elapsed=$(awk "BEGIN {print $current_time - $START_TIME}") |
|
local hours=$(awk "BEGIN {print int($elapsed / 3600)}") |
|
local minutes=$(awk "BEGIN {print int(($elapsed % 3600) / 60)}") |
|
local seconds=$(awk "BEGIN {print int($elapsed % 60)}") |
|
local milliseconds=$(awk "BEGIN {print int(($elapsed - int($elapsed)) * 10000)}") |
|
|
|
|
|
printf "[+%02d:%02d:%02d.%04d] %s\n" "$hours" "$minutes" "$seconds" "$milliseconds" "$*" |
|
} |
|
|
|
declare -A SHUTTING_DOWN |
|
|
|
|
|
|
|
|
|
|
|
if [[ -f "$PIDFILE" ]]; then |
|
|
|
read -r PID < "$PIDFILE" |
|
|
|
|
|
if kill -0 "$PID" 2>/dev/null; then |
|
echo "Script is already running with PID $PID. Exiting." |
|
exit 1 |
|
else |
|
echo "Stale PID file detected. Removing and continuing." |
|
rm -f "$PIDFILE" |
|
|
|
|
|
for svc in "${!SERVICE_PIDFILES[@]}"; do |
|
pidfile="${SERVICE_PIDFILES[$svc]}" |
|
if [[ -f "$pidfile" ]]; then |
|
read -r pid < "$pidfile" |
|
if kill -0 "$pid" 2>/dev/null; then |
|
echo "Stopping $svc (PID $pid) from zombie process." |
|
kill "$pid" |
|
fi |
|
fi |
|
done |
|
fi |
|
fi |
|
|
|
|
|
echo $$ > "$PIDFILE" |
|
|
|
|
|
|
|
|
|
cleanup() { |
|
|
|
if [ -n "$SHUTTING_DOWN" ]; then |
|
return |
|
fi |
|
SHUTTING_DOWN=1 |
|
timestamp_echo "Stopping all processes..." |
|
for svc in "${!SERVICE_PIDS[@]}"; do |
|
pid="${SERVICE_PIDS[$svc]}" |
|
if kill -0 "$pid" 2>/dev/null; then |
|
kill "$pid" |
|
fi |
|
done |
|
|
|
sleep 1 |
|
|
|
for svc in "${!SERVICE_PIDS[@]}"; do |
|
pid="${SERVICE_PIDS[$svc]}" |
|
if kill -0 "$pid" 2>/dev/null; then |
|
kill -9 "$pid" |
|
fi |
|
done |
|
timestamp_echo "All processes stopped." |
|
rm -f "$PIDFILE" |
|
exit 0 |
|
} |
|
terminate() { |
|
timestamp_echo "Caught SIGTERM, shutting down..." |
|
cleanup |
|
} |
|
interrupt() { |
|
timestamp_echo "Caught SIGINT, shutting down..." |
|
cleanup |
|
} |
|
trap interrupt SIGINT |
|
trap terminate SIGTERM |
|
|
|
|
|
|
|
|
|
start_service() { |
|
local svc="$1" |
|
local cmd="${SERVICE_CMDS[$svc]}" |
|
local out="${SERVICE_LOGS_STDOUT[$svc]}" |
|
local err="${SERVICE_LOGS_STDERR[$svc]}" |
|
|
|
timestamp_echo "Starting $svc (restart count ${SERVICE_RESTART_COUNT[$svc]})" |
|
|
|
|
|
|
|
$cmd >>"$out" 2>>"$err" & |
|
SERVICE_PIDS[$svc]=$! |
|
|
|
sleep 0.2 |
|
|
|
|
|
if ! kill -0 "${SERVICE_PIDS[$svc]}" 2>/dev/null; then |
|
timestamp_echo "$svc appears to have daemonized or exited immediately." |
|
else |
|
timestamp_echo "$svc started with PID ${SERVICE_PIDS[$svc]}" |
|
echo "${SERVICE_PIDS[$svc]}" > "${SERVICE_PIDFILES[$svc]}" |
|
fi |
|
} |
|
|
|
|
|
|
|
|
|
attempt_restart() { |
|
local svc="$1" |
|
SERVICE_RESTART_COUNT[$svc]=$(( SERVICE_RESTART_COUNT[$svc] + 1 )) |
|
if (( SERVICE_RESTART_COUNT[$svc] > MAX_RESTARTS )); then |
|
timestamp_echo "$svc crashed too many times. Shutting everything down." |
|
cleanup |
|
else |
|
start_service "$svc" |
|
fi |
|
} |
|
|
|
|
|
|
|
|
|
monitor_services() { |
|
while true; do |
|
sleep 2 |
|
|
|
for svc in "${!SERVICE_PIDS[@]}"; do |
|
pid="${SERVICE_PIDS[$svc]}" |
|
|
|
if ! kill -0 "$pid" 2>/dev/null; then |
|
|
|
timestamp_echo "$svc (PID $pid) not alive! Attempting restart..." |
|
attempt_restart "$svc" |
|
fi |
|
done |
|
|
|
done |
|
} |
|
|
|
main() { |
|
|
|
for svc in "${!SERVICE_CMDS[@]}"; do |
|
SERVICE_RESTART_COUNT[$svc]=0 |
|
start_service "$svc" |
|
done |
|
|
|
|
|
monitor_services |
|
} |
|
|
|
main |
|
|