XuqmGroup-PrivateDeploy/scripts/healthcheck.sh

196 行
6.8 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
. "$ROOT_DIR/scripts/lib.sh"
load_env
audit "healthcheck" "STARTED" "running checks"
progress "healthcheck" "STARTED" "running checks"
OVERALL="UP"
RESULTS=()
WARNINGS=()
check_pass() { RESULTS+=("{\"check\":\"$1\",\"status\":\"OK\",\"detail\":\"$2\"}"); }
check_fail() { RESULTS+=("{\"check\":\"$1\",\"status\":\"FAIL\",\"detail\":\"$2\"}"); OVERALL="DOWN"; }
check_warn() { RESULTS+=("{\"check\":\"$1\",\"status\":\"WARN\",\"detail\":\"$2\"}"); WARNINGS+=("$1"); }
check_skip() { RESULTS+=("{\"check\":\"$1\",\"status\":\"SKIPPED\",\"detail\":\"$2\"}"); }
# Docker daemon
if docker info >/dev/null 2>&1; then
check_pass "docker" "daemon running"
else
check_fail "docker" "daemon not available"
fi
# Container states — docs-site is optional (image may not exist in all registries)
for svc in tenant-service file-service tenant-web ops-web nginx; do
STATE="$(compose ps -q "$svc" 2>/dev/null | head -1 || true)"
if [ -n "$STATE" ]; then
STATUS="$(docker inspect --format='{{.State.Status}}' "$STATE" 2>/dev/null || echo 'unknown')"
if [ "$STATUS" = "running" ]; then
check_pass "container.$svc" "running"
else
check_fail "container.$svc" "status=$STATUS"
fi
else
check_fail "container.$svc" "not found"
fi
done
# Optional service containers (docs-site included here — warn only if present but unhealthy)
for svc in docs-site im-service push-service update-service license-service; do
STATE="$(compose ps -q "$svc" 2>/dev/null | head -1 || true)"
if [ -n "$STATE" ]; then
STATUS="$(docker inspect --format='{{.State.Status}}' "$STATE" 2>/dev/null || echo 'unknown')"
if [ "$STATUS" = "running" ]; then
check_pass "container.$svc" "running"
else
check_fail "container.$svc" "status=$STATUS"
fi
fi
done
# MySQL connectivity
MYSQL_HOST_VAL="${MYSQL_HOST:-127.0.0.1}"
MYSQL_PORT_VAL="${MYSQL_PORT:-3306}"
if [ "${MYSQL_MODE:-external}" = "managed" ]; then
MYSQL_CTR="$(compose ps -q mysql 2>/dev/null | head -1 || true)"
if [ -n "$MYSQL_CTR" ]; then
if docker exec "$MYSQL_CTR" mysqladmin -u root -p"${MYSQL_ROOT_PASSWORD:-}" ping --silent 2>/dev/null; then
check_pass "mysql" "managed container healthy"
else
check_fail "mysql" "managed container ping failed"
fi
else
check_fail "mysql" "managed container not running"
fi
else
if command -v nc >/dev/null 2>&1; then
if nc -z -w3 "$MYSQL_HOST_VAL" "$MYSQL_PORT_VAL" 2>/dev/null; then
check_pass "mysql" "tcp reachable at $MYSQL_HOST_VAL:$MYSQL_PORT_VAL"
else
check_fail "mysql" "tcp unreachable at $MYSQL_HOST_VAL:$MYSQL_PORT_VAL"
fi
else
check_skip "mysql" "nc not available for TCP check"
fi
fi
# Redis connectivity
REDIS_HOST_VAL="${REDIS_HOST:-127.0.0.1}"
REDIS_PORT_VAL="${REDIS_PORT:-6379}"
if [ "${REDIS_MODE:-external}" = "managed" ]; then
REDIS_CTR="$(compose ps -q redis 2>/dev/null | head -1 || true)"
if [ -n "$REDIS_CTR" ]; then
if docker exec "$REDIS_CTR" redis-cli -a "${REDIS_PASSWORD:-}" --no-auth-warning PING 2>/dev/null | grep -q PONG; then
check_pass "redis" "managed container healthy"
else
check_fail "redis" "managed container ping failed"
fi
else
check_fail "redis" "managed container not running"
fi
else
if command -v nc >/dev/null 2>&1; then
if nc -z -w3 "$REDIS_HOST_VAL" "$REDIS_PORT_VAL" 2>/dev/null; then
check_pass "redis" "tcp reachable at $REDIS_HOST_VAL:$REDIS_PORT_VAL"
else
check_fail "redis" "tcp unreachable at $REDIS_HOST_VAL:$REDIS_PORT_VAL"
fi
else
check_skip "redis" "nc not available for TCP check"
fi
fi
# HTTP health endpoints
http_check() {
local name="$1"
local url="$2"
if command -v curl >/dev/null 2>&1; then
HTTP_CODE="$(curl -skL -o /dev/null -w '%{http_code}' --max-time 5 "$url" 2>/dev/null || echo '000')"
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "204" ]; then
check_pass "$name" "HTTP $HTTP_CODE from $url"
else
check_warn "$name" "HTTP $HTTP_CODE from $url"
fi
else
check_skip "$name" "curl not available"
fi
}
# Resolve the base URL to probe: prefer CONSOLE_DOMAIN, fall back to localhost
CONSOLE_DOMAIN_VAL="${CONSOLE_DOMAIN:-}"
if [ -n "$CONSOLE_DOMAIN_VAL" ]; then
HTTP_BASE="$CONSOLE_DOMAIN_VAL"
else
# Derive port from nginx container if running
NGINX_PORT="$(compose port nginx 80 2>/dev/null | cut -d: -f2 || true)"
HTTP_BASE="http://localhost:${NGINX_PORT:-80}"
fi
http_check "http.actuator" "${HTTP_BASE}/actuator/health"
http_check "http.tenant-web" "${HTTP_BASE}/"
# Verify private mode is active
if command -v curl >/dev/null 2>&1; then
DEPLOY_STATUS="$(curl -skL --max-time 5 "${HTTP_BASE}/api/private/deployment/status" 2>/dev/null || true)"
if printf '%s' "$DEPLOY_STATUS" | grep -q '"mode":"PRIVATE"'; then
check_pass "private-mode" "PRIVATE mode confirmed"
elif printf '%s' "$DEPLOY_STATUS" | grep -q '"mode"'; then
check_warn "private-mode" "deployment/status returned non-PRIVATE mode"
else
check_warn "private-mode" "deployment/status unreachable or unexpected response"
fi
fi
[ "${ENABLE_IM:-false}" = "true" ] && [ -n "${IM_DOMAIN:-}" ] && \
http_check "http.im-service" "${IM_DOMAIN}/actuator/health"
[ "${ENABLE_UPDATE:-false}" = "true" ] && [ -n "${UPDATE_DOMAIN:-}" ] && \
http_check "http.update-service" "${UPDATE_DOMAIN}/actuator/health"
[ "${ENABLE_LICENSE:-false}" = "true" ] && [ -n "${LICENSE_DOMAIN:-}" ] && \
http_check "http.license-service" "${LICENSE_DOMAIN}/actuator/health"
# Disk space
DISK_USE="$(df -h "$ROOT_DIR" | awk 'NR==2{print $5}' | tr -d '%')"
if [ -n "$DISK_USE" ]; then
if [ "$DISK_USE" -ge 90 ]; then
check_fail "disk" "usage ${DISK_USE}% >= 90%"
elif [ "$DISK_USE" -ge 85 ]; then
check_warn "disk" "usage ${DISK_USE}% >= 85%"
else
check_pass "disk" "usage ${DISK_USE}%"
fi
fi
# Build JSON result
RESULTS_JSON="$(printf '%s\n' "${RESULTS[@]+"${RESULTS[@]}"}" | paste -sd ',' - | sed 's/^/[/' | sed 's/$/]/')"
WARNINGS_JSON="$(printf '"%s"\n' "${WARNINGS[@]+"${WARNINGS[@]}"}" | paste -sd ',' - | sed 's/^/[/' | sed 's/$/]/')"
[ -z "${WARNINGS[*]:-}" ] && WARNINGS_JSON="[]"
cat > "$ROOT_DIR/.deploy-state/last-healthcheck.json" <<EOF
{
"status": "$OVERALL",
"timestamp": "$(now)",
"version": "${PRIVATE_VERSION:-unknown}",
"mysqlMode": "${MYSQL_MODE:-external}",
"redisMode": "${REDIS_MODE:-external}",
"profiles": "${COMPOSE_PROFILES:-base}",
"checks": $RESULTS_JSON,
"warnings": $WARNINGS_JSON
}
EOF
audit "healthcheck" "$OVERALL" "checks=${#RESULTS[@]}"
progress "healthcheck" "$OVERALL" ""
# Print summary
printf '\n=== Health Check: %s ===\n' "$OVERALL"
for r in "${RESULTS[@]+"${RESULTS[@]}"}"; do
printf ' %s\n' "$r"
done
[ "$OVERALL" = "UP" ] || fail_json "XUQM_PRIVATE_4040" "health check failed; see .deploy-state/last-healthcheck.json" "healthcheck"