265 lines
8.0 KiB
Bash
Executable File
265 lines
8.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# =====================================================
|
|
# ERP GENERIC - Health Check Script
|
|
# Comprehensive system health check
|
|
# =====================================================
|
|
|
|
set -euo pipefail
|
|
|
|
# Configuration
|
|
BACKEND_URL="${BACKEND_URL:-http://localhost:3000}"
|
|
FRONTEND_URL="${FRONTEND_URL:-http://localhost:5173}"
|
|
DB_HOST="${POSTGRES_HOST:-postgres}"
|
|
DB_PORT="${POSTGRES_PORT:-5432}"
|
|
DB_NAME="${POSTGRES_DB:-erp_generic}"
|
|
DB_USER="${POSTGRES_USER:-erp_user}"
|
|
REDIS_HOST="${REDIS_HOST:-redis}"
|
|
REDIS_PORT="${REDIS_PORT:-6379}"
|
|
REDIS_PASSWORD="${REDIS_PASSWORD:-}"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Exit code (0 = healthy, 1 = unhealthy)
|
|
EXIT_CODE=0
|
|
|
|
# Helper functions
|
|
print_header() {
|
|
echo ""
|
|
echo "=========================================="
|
|
echo "$1"
|
|
echo "=========================================="
|
|
}
|
|
|
|
check_success() {
|
|
echo -e "${GREEN}✓ $1${NC}"
|
|
}
|
|
|
|
check_warning() {
|
|
echo -e "${YELLOW}⚠ $1${NC}"
|
|
}
|
|
|
|
check_failure() {
|
|
echo -e "${RED}✗ $1${NC}"
|
|
EXIT_CODE=1
|
|
}
|
|
|
|
# Main health check
|
|
echo "===== ERP Generic Health Check ====="
|
|
echo "Started at: $(date)"
|
|
echo ""
|
|
|
|
# 1. Backend API Health Check
|
|
print_header "1. Backend API"
|
|
|
|
if curl -f -s --max-time 5 "${BACKEND_URL}/health" > /dev/null 2>&1; then
|
|
check_success "Backend API is responding"
|
|
|
|
# Check detailed health
|
|
HEALTH_RESPONSE=$(curl -s --max-time 5 "${BACKEND_URL}/health")
|
|
echo "Response: $HEALTH_RESPONSE"
|
|
|
|
# Parse JSON response (requires jq)
|
|
if command -v jq &> /dev/null; then
|
|
DB_STATUS=$(echo "$HEALTH_RESPONSE" | jq -r '.checks.database.status' 2>/dev/null || echo "unknown")
|
|
REDIS_STATUS=$(echo "$HEALTH_RESPONSE" | jq -r '.checks.redis.status' 2>/dev/null || echo "unknown")
|
|
|
|
if [ "$DB_STATUS" = "up" ]; then
|
|
check_success "Database connection: OK"
|
|
else
|
|
check_failure "Database connection: FAILED"
|
|
fi
|
|
|
|
if [ "$REDIS_STATUS" = "up" ]; then
|
|
check_success "Redis connection: OK"
|
|
else
|
|
check_failure "Redis connection: FAILED"
|
|
fi
|
|
fi
|
|
else
|
|
check_failure "Backend API is not responding"
|
|
fi
|
|
|
|
# 2. Frontend Health Check
|
|
print_header "2. Frontend"
|
|
|
|
if curl -f -s --max-time 5 "${FRONTEND_URL}" > /dev/null 2>&1; then
|
|
check_success "Frontend is accessible"
|
|
else
|
|
check_failure "Frontend is not accessible"
|
|
fi
|
|
|
|
# 3. PostgreSQL Database Check
|
|
print_header "3. PostgreSQL Database"
|
|
|
|
PGPASSWORD="${POSTGRES_PASSWORD}"
|
|
export PGPASSWORD
|
|
|
|
if pg_isready -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" > /dev/null 2>&1; then
|
|
check_success "PostgreSQL is accepting connections"
|
|
|
|
# Check database exists
|
|
if psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -lqt | cut -d \| -f 1 | grep -qw "$DB_NAME"; then
|
|
check_success "Database '$DB_NAME' exists"
|
|
|
|
# Check connection count
|
|
CONNECTION_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -tAc "SELECT count(*) FROM pg_stat_activity WHERE datname='$DB_NAME';" 2>/dev/null || echo "0")
|
|
MAX_CONNECTIONS=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -tAc "SHOW max_connections;" 2>/dev/null || echo "100")
|
|
|
|
echo "Active connections: $CONNECTION_COUNT / $MAX_CONNECTIONS"
|
|
|
|
if [ "$CONNECTION_COUNT" -gt $((MAX_CONNECTIONS * 80 / 100)) ]; then
|
|
check_warning "Connection pool is >80% utilized"
|
|
else
|
|
check_success "Connection pool is healthy"
|
|
fi
|
|
|
|
# Check table count
|
|
TABLE_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -tAc "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema NOT IN ('pg_catalog', 'information_schema');" 2>/dev/null || echo "0")
|
|
echo "Tables in database: $TABLE_COUNT"
|
|
|
|
if [ "$TABLE_COUNT" -gt 0 ]; then
|
|
check_success "Database has tables"
|
|
else
|
|
check_warning "Database has no tables (not initialized?)"
|
|
fi
|
|
|
|
# Check for recent errors in pg_stat_database
|
|
ERROR_COUNT=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -tAc "SELECT deadlocks FROM pg_stat_database WHERE datname='$DB_NAME';" 2>/dev/null || echo "0")
|
|
if [ "$ERROR_COUNT" -gt 0 ]; then
|
|
check_warning "Database has $ERROR_COUNT deadlocks recorded"
|
|
fi
|
|
|
|
else
|
|
check_failure "Database '$DB_NAME' does not exist"
|
|
fi
|
|
else
|
|
check_failure "PostgreSQL is not accepting connections"
|
|
fi
|
|
|
|
# 4. Redis Cache Check
|
|
print_header "4. Redis Cache"
|
|
|
|
if [ -n "$REDIS_PASSWORD" ]; then
|
|
REDIS_CLI_CMD="redis-cli -h $REDIS_HOST -p $REDIS_PORT -a $REDIS_PASSWORD"
|
|
else
|
|
REDIS_CLI_CMD="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
|
fi
|
|
|
|
if $REDIS_CLI_CMD ping > /dev/null 2>&1; then
|
|
check_success "Redis is responding"
|
|
|
|
# Check Redis info
|
|
REDIS_VERSION=$($REDIS_CLI_CMD INFO server | grep redis_version | cut -d: -f2 | tr -d '\r')
|
|
echo "Redis version: $REDIS_VERSION"
|
|
|
|
CONNECTED_CLIENTS=$($REDIS_CLI_CMD INFO clients | grep connected_clients | cut -d: -f2 | tr -d '\r')
|
|
echo "Connected clients: $CONNECTED_CLIENTS"
|
|
|
|
USED_MEMORY=$($REDIS_CLI_CMD INFO memory | grep used_memory_human | cut -d: -f2 | tr -d '\r')
|
|
echo "Used memory: $USED_MEMORY"
|
|
|
|
check_success "Redis is healthy"
|
|
else
|
|
check_failure "Redis is not responding"
|
|
fi
|
|
|
|
# 5. Disk Space Check
|
|
print_header "5. Disk Space"
|
|
|
|
DISK_USAGE=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//')
|
|
|
|
echo "Disk usage: ${DISK_USAGE}%"
|
|
|
|
if [ "$DISK_USAGE" -lt 70 ]; then
|
|
check_success "Disk space is healthy"
|
|
elif [ "$DISK_USAGE" -lt 85 ]; then
|
|
check_warning "Disk space is >70% utilized"
|
|
else
|
|
check_failure "Disk space is critically low (>85%)"
|
|
fi
|
|
|
|
# 6. CPU & Memory Check
|
|
print_header "6. System Resources"
|
|
|
|
if command -v free &> /dev/null; then
|
|
MEMORY_USAGE=$(free | awk 'NR==2 {printf "%.0f", $3*100/$2}')
|
|
echo "Memory usage: ${MEMORY_USAGE}%"
|
|
|
|
if [ "$MEMORY_USAGE" -lt 80 ]; then
|
|
check_success "Memory usage is healthy"
|
|
else
|
|
check_warning "Memory usage is high (>80%)"
|
|
fi
|
|
fi
|
|
|
|
if command -v top &> /dev/null; then
|
|
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
|
|
echo "CPU usage: ${CPU_USAGE}%"
|
|
|
|
# Note: This is a simple check, may need adjustment based on system
|
|
if command -v awk &> /dev/null; then
|
|
if awk "BEGIN {exit !($CPU_USAGE < 80)}"; then
|
|
check_success "CPU usage is healthy"
|
|
else
|
|
check_warning "CPU usage is high (>80%)"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# 7. Docker Containers Check (if applicable)
|
|
print_header "7. Docker Containers"
|
|
|
|
if command -v docker &> /dev/null; then
|
|
RUNNING_CONTAINERS=$(docker ps --filter "name=erp" --format "{{.Names}}" | wc -l)
|
|
echo "Running ERP containers: $RUNNING_CONTAINERS"
|
|
|
|
if [ "$RUNNING_CONTAINERS" -gt 0 ]; then
|
|
check_success "Docker containers are running"
|
|
|
|
# List container status
|
|
docker ps --filter "name=erp" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
|
else
|
|
check_warning "No ERP containers are running"
|
|
fi
|
|
else
|
|
echo "Docker is not installed or not in PATH"
|
|
fi
|
|
|
|
# 8. Log Errors Check (last 1 hour)
|
|
print_header "8. Recent Errors in Logs"
|
|
|
|
if [ -f "/var/log/erp-generic/backend.log" ]; then
|
|
ERROR_COUNT=$(grep -c "ERROR" /var/log/erp-generic/backend.log 2>/dev/null || echo "0")
|
|
echo "Errors in last log file: $ERROR_COUNT"
|
|
|
|
if [ "$ERROR_COUNT" -eq 0 ]; then
|
|
check_success "No errors in recent logs"
|
|
elif [ "$ERROR_COUNT" -lt 10 ]; then
|
|
check_warning "Found $ERROR_COUNT errors in logs"
|
|
else
|
|
check_failure "Found $ERROR_COUNT errors in logs (investigate!)"
|
|
fi
|
|
else
|
|
echo "Log file not found: /var/log/erp-generic/backend.log"
|
|
fi
|
|
|
|
# Summary
|
|
print_header "Health Check Summary"
|
|
|
|
if [ $EXIT_CODE -eq 0 ]; then
|
|
echo -e "${GREEN}✓ All systems are healthy${NC}"
|
|
else
|
|
echo -e "${RED}✗ Some systems are unhealthy (see above)${NC}"
|
|
fi
|
|
|
|
echo ""
|
|
echo "Completed at: $(date)"
|
|
echo "===== End of Health Check ====="
|
|
|
|
# Exit with appropriate code (0 = healthy, 1 = unhealthy)
|
|
exit $EXIT_CODE
|