|
| 1 | +#!/bin/bash |
| 2 | +set -e |
| 3 | + |
| 4 | +# Advanced development monitor with automatic cleanup |
| 5 | +# Monitors the bot container and shuts down all services if it fails |
| 6 | + |
| 7 | +echo "🚀 Starting Tux Development Monitor" |
| 8 | +echo "====================================" |
| 9 | + |
| 10 | +# Configuration |
| 11 | +BOT_CONTAINER="tux" |
| 12 | +MAX_RESTART_ATTEMPTS=3 |
| 13 | +RESTART_DELAY=5 |
| 14 | +MONITOR_INTERVAL=10 |
| 15 | + |
| 16 | +# Function to cleanup all services |
| 17 | +cleanup() { |
| 18 | + echo "" |
| 19 | + echo "🧹 Cleaning up all services..." |
| 20 | + docker compose down |
| 21 | + echo "✅ Cleanup complete" |
| 22 | +} |
| 23 | + |
| 24 | +# Function to check if bot container is running and healthy |
| 25 | +check_bot_health() { |
| 26 | + local container_status=$(docker inspect --format='{{.State.Status}}' "$BOT_CONTAINER" 2>/dev/null || echo "not_found") |
| 27 | + local exit_code=$(docker inspect --format='{{.State.ExitCode}}' "$BOT_CONTAINER" 2>/dev/null || echo "0") |
| 28 | + |
| 29 | + if [ "$container_status" = "not_found" ]; then |
| 30 | + echo "❌ Bot container not found" |
| 31 | + return 1 |
| 32 | + elif [ "$container_status" = "exited" ]; then |
| 33 | + echo "❌ Bot container exited with code: $exit_code" |
| 34 | + return 1 |
| 35 | + elif [ "$container_status" = "running" ]; then |
| 36 | + echo "✅ Bot container is running" |
| 37 | + return 0 |
| 38 | + else |
| 39 | + echo "⚠️ Bot container status: $container_status" |
| 40 | + return 1 |
| 41 | + fi |
| 42 | +} |
| 43 | + |
| 44 | +# Function to start services |
| 45 | +start_services() { |
| 46 | + echo "⏳ Starting services..." |
| 47 | + if ! docker compose up -d; then |
| 48 | + echo "❌ Failed to start services" |
| 49 | + return 1 |
| 50 | + fi |
| 51 | + |
| 52 | + # Wait for bot to start |
| 53 | + echo "⏳ Waiting for bot to start..." |
| 54 | + local attempts=0 |
| 55 | + while [ $attempts -lt 30 ]; do |
| 56 | + if check_bot_health; then |
| 57 | + echo "✅ Bot started successfully" |
| 58 | + return 0 |
| 59 | + fi |
| 60 | + sleep 2 |
| 61 | + attempts=$((attempts + 1)) |
| 62 | + done |
| 63 | + |
| 64 | + echo "❌ Bot failed to start within timeout" |
| 65 | + return 1 |
| 66 | +} |
| 67 | + |
| 68 | +# Set up trap to cleanup on script exit |
| 69 | +trap cleanup EXIT INT TERM |
| 70 | + |
| 71 | +# Start services |
| 72 | +if ! start_services; then |
| 73 | + echo "❌ Failed to start services" |
| 74 | + exit 1 |
| 75 | +fi |
| 76 | + |
| 77 | +# Monitor loop |
| 78 | +echo "👀 Starting monitor loop..." |
| 79 | +restart_attempts=0 |
| 80 | + |
| 81 | +while true; do |
| 82 | + if ! check_bot_health; then |
| 83 | + restart_attempts=$((restart_attempts + 1)) |
| 84 | + echo "⚠️ Bot failure detected (attempt $restart_attempts/$MAX_RESTART_ATTEMPTS)" |
| 85 | + |
| 86 | + if [ $restart_attempts -ge $MAX_RESTART_ATTEMPTS ]; then |
| 87 | + echo "❌ Maximum restart attempts reached. Shutting down all services." |
| 88 | + cleanup |
| 89 | + exit 1 |
| 90 | + fi |
| 91 | + |
| 92 | + echo "🔄 Restarting services in ${RESTART_DELAY} seconds..." |
| 93 | + sleep $RESTART_DELAY |
| 94 | + |
| 95 | + if ! start_services; then |
| 96 | + echo "❌ Failed to restart services" |
| 97 | + cleanup |
| 98 | + exit 1 |
| 99 | + fi |
| 100 | + else |
| 101 | + # Reset restart counter on successful health check |
| 102 | + restart_attempts=0 |
| 103 | + fi |
| 104 | + |
| 105 | + sleep $MONITOR_INTERVAL |
| 106 | +done |
0 commit comments