#!/usr/bin/env bash # update.sh — XuqmGroup 私有化部署热更新脚本 # # 执行内容(顺序): # 1. 检测并修正 CONSOLE_DOMAIN(裸 IP → 提示输入公网域名) # 2. 修复 SDK URL(公网平台残留地址 / 内网 IP) # 3. 同步 SDK 初始化配置文件 # 4. 检测并自动修复宿主机 nginx WebSocket 代理头 # 5. 可选拉取最新镜像 # 6. 重启受影响容器 # 7. 等待 tenant-service 健康 # 8. 自动处理积压的 PENDING 服务开通申请 # 9. 全量验证 # # 前提:已执行过 install.sh 完成初始部署 set -euo pipefail # --------------------------------------------------------------------------- # 定位安装目录 # --------------------------------------------------------------------------- _script_parent="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" if [ -f "$_script_parent/docker-compose.yml" ]; then ROOT_DIR="$_script_parent" else ROOT_DIR="" for _d in /opt/xuqm-private /opt/xuqm /root/xuqm-private; do [ -f "$_d/docker-compose.yml" ] && ROOT_DIR="$_d" && break done if [ -z "$ROOT_DIR" ]; then read -rp " 请输入部署目录路径(如 /opt/xuqm-private): " ROOT_DIR [ -f "$ROOT_DIR/docker-compose.yml" ] || \ { printf "\nERROR: %s 下未找到 docker-compose.yml\n" "$ROOT_DIR" >&2; exit 1; } fi fi # --------------------------------------------------------------------------- # 工具函数 # --------------------------------------------------------------------------- BOLD='\033[1m'; RESET='\033[0m' CYAN='\033[1;36m'; GREEN='\033[32m'; YELLOW='\033[33m'; RED='\033[1;31m' log() { printf "\n${CYAN}[update] %s${RESET}\n" "$*"; } ok() { printf " ${GREEN}✓${RESET} %s\n" "$*"; } warn() { printf " ${YELLOW}⚠${RESET} %s\n" "$*"; } fail() { printf "\n${RED}ERROR: %s${RESET}\n" "$*" >&2; exit 1; } info() { printf " → %s\n" "$*"; } # 修改 env 文件中某个 key 的值;若 key 不存在则追加 _set_env() { local file="$1" key="$2" val="$3" if grep -q "^${key}=" "$file" 2>/dev/null; then python3 - "$file" "$key" "$val" <<'PY' import sys, re path, key, val = sys.argv[1], sys.argv[2], sys.argv[3] content = open(path).read() new = re.sub(r'^' + re.escape(key) + r'=.*$', key + '=' + val, content, flags=re.MULTILINE) open(path, 'w').write(new) PY else printf '%s=%s\n' "$key" "$val" >> "$file" fi } # 判断一个 http/https URL 是否需要修复(空 / xuqinmin.com 残留 / 裸 IP) _url_needs_fix() { local val="$1" [ -z "$val" ] && return 0 printf '%s' "$val" | grep -qi 'xuqinmin\.com' && return 0 local host host="$(printf '%s' "$val" | sed 's|https\?://||; s|/.*||; s|:.*||')" printf '%s' "$host" | grep -qE \ '^([0-9]{1,3}\.){3}[0-9]{1,3}$' && return 0 return 1 } # --------------------------------------------------------------------------- # Banner # --------------------------------------------------------------------------- printf "\n${BOLD}══════════════════════════════════════════════════${RESET}\n" printf "${BOLD} XuqmGroup 私有化部署热更新${RESET}\n" printf "${BOLD}══════════════════════════════════════════════════${RESET}\n" printf " 部署目录: %s\n\n" "$ROOT_DIR" # --------------------------------------------------------------------------- # 前置检查 # --------------------------------------------------------------------------- [ -f "$ROOT_DIR/.env" ] || fail "未找到 .env,请先执行 install.sh 完成初始部署" [ -f "$ROOT_DIR/config/xuqm.env" ] || fail "未找到 config/xuqm.env,请先执行 install.sh 完成初始部署" [ -f "$ROOT_DIR/config/secrets.env" ] || fail "未找到 config/secrets.env,请先执行 install.sh 完成初始部署" command -v docker >/dev/null 2>&1 || fail "Docker 未安装" command -v python3 >/dev/null 2>&1 || fail "python3 未安装" docker info >/dev/null 2>&1 || fail "Docker daemon 未运行,请执行: systemctl start docker" # --------------------------------------------------------------------------- # 加载现有配置 # --------------------------------------------------------------------------- log "加载现有配置" set -a # shellcheck disable=SC1090,SC1091 . "$ROOT_DIR/.env" . "$ROOT_DIR/config/secrets.env" set +a _CONSOLE_DOMAIN="$(grep '^CONSOLE_DOMAIN=' "$ROOT_DIR/config/xuqm.env" 2>/dev/null \ | cut -d= -f2- | tr -d '"' | tr -d "'")" _NGINX_BIND="${NGINX_BIND:-80}" _NGINX_PORT="${_NGINX_BIND##*:}" ok "CONSOLE_DOMAIN=${_CONSOLE_DOMAIN:-(未设置)}" ok "NGINX_BIND=${_NGINX_BIND}" [ -n "$_CONSOLE_DOMAIN" ] || fail "config/xuqm.env 中 CONSOLE_DOMAIN 未设置,请手动补充后重试" # --------------------------------------------------------------------------- # Step 1 — 检测 CONSOLE_DOMAIN 是否为裸 IP,提示更正 # --------------------------------------------------------------------------- log "检查 CONSOLE_DOMAIN" _domain_host="$(printf '%s' "$_CONSOLE_DOMAIN" | sed 's|https\?://||; s|/.*||; s|:.*||')" if printf '%s' "$_domain_host" | grep -qE '^([0-9]{1,3}\.){3}[0-9]{1,3}$'; then warn "CONSOLE_DOMAIN 当前为 IP 地址(${_CONSOLE_DOMAIN})" printf ' SDK 配置会把 IM WebSocket 地址设成该 IP,外网客户端无法通过域名 TLS 连接。\n' printf ' 请输入对外访问的公网域名(含协议,如 https://console.example.com)\n' read -rp " 新的 CONSOLE_DOMAIN(直接回车保持原值): " _new_domain if [ -n "$_new_domain" ]; then _set_env "$ROOT_DIR/config/xuqm.env" "CONSOLE_DOMAIN" "$_new_domain" _CONSOLE_DOMAIN="$_new_domain" ok "CONSOLE_DOMAIN 已更新 → ${_CONSOLE_DOMAIN}" else warn "保持原值 ${_CONSOLE_DOMAIN},如 SDK 无法连接请修改 config/xuqm.env 后重试" fi else ok "CONSOLE_DOMAIN 正常: ${_CONSOLE_DOMAIN}" fi # --------------------------------------------------------------------------- # Step 2 — 修复 SDK URL # --------------------------------------------------------------------------- log "检查并修复 SDK URL" if printf '%s' "$_CONSOLE_DOMAIN" | grep -q '^https://'; then _WS_SCHEME="wss" else _WS_SCHEME="ws" fi _DEPLOY_HOST="$(printf '%s' "$_CONSOLE_DOMAIN" | sed 's|https\?://||; s|/.*||')" _SDK_IM_WS_URL="${_WS_SCHEME}://${_DEPLOY_HOST}/ws/im" _SDK_IM_API_URL="${_CONSOLE_DOMAIN}" _SDK_FILE_URL="${_CONSOLE_DOMAIN}" _FIXED=0 # SDK_IM_WS_URL:ws/wss → 转成 http/https 格式后复用 _url_needs_fix _CURRENT_WS="$(grep '^SDK_IM_WS_URL=' "$ROOT_DIR/config/xuqm.env" 2>/dev/null | cut -d= -f2- || true)" _CURRENT_WS_HTTP="$(printf '%s' "$_CURRENT_WS" | sed 's|^wss://|https://|; s|^ws://|http://|')" if _url_needs_fix "$_CURRENT_WS_HTTP"; then _set_env "$ROOT_DIR/config/xuqm.env" "SDK_IM_WS_URL" "$_SDK_IM_WS_URL" ok "SDK_IM_WS_URL 已更新 → ${_SDK_IM_WS_URL}" _FIXED=1 else ok "SDK_IM_WS_URL 正常: ${_CURRENT_WS}" fi _CURRENT_IM_API="$(grep '^SDK_IM_API_URL=' "$ROOT_DIR/config/xuqm.env" 2>/dev/null | cut -d= -f2- || true)" if _url_needs_fix "$_CURRENT_IM_API"; then _set_env "$ROOT_DIR/config/xuqm.env" "SDK_IM_API_URL" "$_SDK_IM_API_URL" ok "SDK_IM_API_URL 已更新 → ${_SDK_IM_API_URL}" _FIXED=1 else ok "SDK_IM_API_URL 正常: ${_CURRENT_IM_API}" fi _CURRENT_FILE="$(grep '^SDK_FILE_SERVICE_URL=' "$ROOT_DIR/config/xuqm.env" 2>/dev/null | cut -d= -f2- || true)" if _url_needs_fix "$_CURRENT_FILE"; then _set_env "$ROOT_DIR/config/xuqm.env" "SDK_FILE_SERVICE_URL" "$_SDK_FILE_URL" ok "SDK_FILE_SERVICE_URL 已更新 → ${_SDK_FILE_URL}" _FIXED=1 else ok "SDK_FILE_SERVICE_URL 正常: ${_CURRENT_FILE}" fi # 清理 .env 中残留的 OPS_DOMAIN if grep -q '^OPS_DOMAIN=' "$ROOT_DIR/.env" 2>/dev/null; then python3 - "$ROOT_DIR/.env" <<'PY' import sys, re content = open(sys.argv[1]).read() content = re.sub(r'^OPS_DOMAIN=.*\n?', '', content, flags=re.MULTILINE) open(sys.argv[1], 'w').write(content) PY ok ".env 中已清理 OPS_DOMAIN" _FIXED=1 fi # 扫描是否还有其他 xuqinmin.com 残留 if grep -qi 'xuqinmin\.com' "$ROOT_DIR/config/xuqm.env" 2>/dev/null; then warn "config/xuqm.env 中仍有 xuqinmin.com 字样,请人工核查:" grep -n 'xuqinmin\.com' "$ROOT_DIR/config/xuqm.env" | while IFS= read -r _line; do printf ' %s\n' "$_line" done fi [ "$_FIXED" -eq 0 ] && ok "SDK 配置无需修复" # --------------------------------------------------------------------------- # Step 3 — 同步 SDK 初始化配置文件 # --------------------------------------------------------------------------- if [ -f "$ROOT_DIR/config/sdk/xuqm-private-sdk.json" ]; then python3 - "$ROOT_DIR/config/sdk/xuqm-private-sdk.json" \ "$_CONSOLE_DOMAIN" "$_SDK_IM_WS_URL" <<'PY' import json, sys path, console, ws = sys.argv[1], sys.argv[2], sys.argv[3] try: d = json.load(open(path)) d['controlBaseUrl'] = console d['fileBaseUrl'] = console d['imApiBaseUrl'] = console d['imWsUrl'] = ws json.dump(d, open(path, 'w'), ensure_ascii=False, indent=2) except Exception as e: print(f'warning: {e}', file=sys.stderr) PY ok "config/sdk/xuqm-private-sdk.json 已同步" fi # --------------------------------------------------------------------------- # Step 4 — 检测并修复宿主机 nginx WebSocket 代理头 # --------------------------------------------------------------------------- log "检查宿主机 nginx WebSocket 配置" if ! command -v nginx >/dev/null 2>&1; then info "宿主机未安装 nginx,跳过(流量直接进入容器 nginx)" else _nginx_result="$(python3 - "$_NGINX_PORT" <<'PYEOF' import sys, os, re, shutil, subprocess port = sys.argv[1] # 获取 nginx 完整配置 try: r = subprocess.run(['nginx', '-T'], capture_output=True, text=True, timeout=15) dump = r.stdout except Exception as e: print(f'SKIP:nginx -T 失败: {e}') sys.exit(0) # 解析 nginx -T 输出,按文件分组 file_contents = {} cur_file = None cur_lines = [] for line in dump.splitlines(): if line.startswith('# configuration file '): if cur_file: file_contents[cur_file] = '\n'.join(cur_lines) cur_file = line.removeprefix('# configuration file ').rstrip(':') cur_lines = [] else: cur_lines.append(line) if cur_file: file_contents[cur_file] = '\n'.join(cur_lines) proxy_re = re.compile( r'proxy_pass\s+https?://(?:127\.0\.0\.1|localhost):' + re.escape(port) + r'\s*;', re.IGNORECASE ) def has_ws_headers(text): return (re.search(r'proxy_http_version\s+1\.1', text) and re.search(r'proxy_set_header\s+Upgrade', text, re.IGNORECASE) and re.search(r'proxy_set_header\s+Connection.*upgrade', text, re.IGNORECASE)) WS_INJECT = ( '\n proxy_http_version 1.1;' '\n proxy_set_header Upgrade $http_upgrade;' '\n proxy_set_header Connection "upgrade";' '\n proxy_read_timeout 3600s;' ) def find_location_blocks(text): """返回 (start, end) 列表,每个 location { ... } 块的范围(支持嵌套)""" results = [] i = 0 while i < len(text): m = re.search(r'\blocation\b[^{]*\{', text[i:]) if not m: break abs_start = i + m.start() depth = 1 j = i + m.end() while j < len(text) and depth: if text[j] == '{': depth += 1 elif text[j] == '}': depth -= 1 j += 1 results.append((abs_start, j)) i = i + m.end() return results files_fixed = [] files_checked = [] for fpath, content in file_contents.items(): if not os.path.isfile(fpath): continue if not proxy_re.search(content): continue files_checked.append(fpath) with open(fpath) as f: real = f.read() blocks = find_location_blocks(real) changed = False offset = 0 for start, end in blocks: block = real[start + offset: end + offset] if not proxy_re.search(block): continue if has_ws_headers(block): continue # 在 proxy_pass 行后注入 WebSocket 头 patched = re.sub( r'(proxy_pass\s+[^;]+;)', r'\1' + WS_INJECT, block, count=1 ) real = real[:start + offset] + patched + real[end + offset:] offset += len(patched) - len(block) changed = True if not changed: continue backup = fpath + '.xuqm.bak' shutil.copy2(fpath, backup) try: with open(fpath, 'w') as f: f.write(real) except Exception as e: shutil.copy2(backup, fpath) print(f'WARN:写入 {fpath} 失败: {e}') continue # 验证语法 r2 = subprocess.run(['nginx', '-t'], capture_output=True, text=True) if r2.returncode != 0: shutil.copy2(backup, fpath) print(f'WARN:nginx -t 失败,已回滚 {fpath}') print(r2.stderr.strip()) else: files_fixed.append(fpath) if files_fixed: print('FIXED:' + ','.join(files_fixed)) elif files_checked: print('OK:WebSocket 头已存在') else: print('NONE:未发现代理到容器 nginx 的配置') PYEOF )" case "${_nginx_result%%:*}" in FIXED) _fixed_files="${_nginx_result#FIXED:}" ok "宿主机 nginx 已自动补全 WebSocket 代理头" for _f in $(printf '%s' "$_fixed_files" | tr ',' '\n'); do info "已修改: ${_f}(原文件备份为 ${_f}.xuqm.bak)" done info "正在重载宿主机 nginx ..." nginx -s reload 2>/dev/null && ok "nginx reload 成功" || warn "nginx reload 失败,请手动执行: nginx -s reload" ;; OK) ok "宿主机 nginx WebSocket 头已存在,无需修改" ;; NONE) info "未发现代理到容器 nginx(端口 ${_NGINX_PORT})的配置,如有上层代理请手动确认:" printf '\n' printf ' location / {\n' printf ' proxy_pass http://<本机IP>:%s;\n' "$_NGINX_PORT" printf ' proxy_http_version 1.1;\n' printf ' proxy_set_header Upgrade $http_upgrade;\n' printf ' proxy_set_header Connection "upgrade";\n' printf ' proxy_set_header Host $host;\n' printf ' proxy_set_header X-Real-IP $remote_addr;\n' printf ' proxy_set_header X-Forwarded-Proto $scheme;\n' printf ' proxy_read_timeout 3600s;\n' printf ' }\n\n' ;; SKIP) warn "宿主机 nginx 检查跳过: ${_nginx_result#SKIP:}" ;; WARN*) warn "${_nginx_result#WARN:}" ;; *) warn "宿主机 nginx 检查返回未知结果,请手动核查 WebSocket 头配置" ;; esac fi # --------------------------------------------------------------------------- # Step 5 — 可选拉取最新镜像 # --------------------------------------------------------------------------- log "拉取最新镜像(可选)" read -rp " 是否拉取最新镜像?(y/N): " _pull_choice if [ "${_pull_choice:-n}" = "y" ] || [ "${_pull_choice:-n}" = "Y" ]; then _REGISTRY_HOST="${REGISTRY_HOST:-}" _REGISTRY_USER="${REGISTRY_USER:-}" _REGISTRY_PASSWORD="${REGISTRY_PASSWORD:-}" if [ -n "$_REGISTRY_PASSWORD" ] && [ -n "$_REGISTRY_HOST" ]; then printf '%s' "$_REGISTRY_PASSWORD" | \ docker login "$_REGISTRY_HOST" -u "$_REGISTRY_USER" --password-stdin 2>/dev/null \ && ok "镜像仓库登录成功" \ || warn "镜像仓库登录失败,将尝试使用本地缓存" fi docker compose \ --env-file "$ROOT_DIR/.env" \ -f "$ROOT_DIR/docker-compose.yml" \ -f "$ROOT_DIR/docker-compose.infra.yml" \ pull --ignore-pull-failures 2>/dev/null \ && ok "镜像已更新" \ || warn "部分镜像拉取失败,将使用本地缓存继续" else info "跳过镜像拉取" fi # --------------------------------------------------------------------------- # Step 6 — 重启受影响的容器(up -d 确保应用已拉取的新镜像) # --------------------------------------------------------------------------- log "重启受影响的容器" # 重新加载 secrets(docker compose 需要环境变量) set -a # shellcheck disable=SC1090 . "$ROOT_DIR/config/secrets.env" set +a _COMPOSE="docker compose --env-file ${ROOT_DIR}/.env \ -f ${ROOT_DIR}/docker-compose.yml \ -f ${ROOT_DIR}/docker-compose.infra.yml" # 使用 up -d 而非 restart:restart 只重启容器进程,不会切换到新拉取的镜像; # up -d 会检测镜像变更并重建容器,是应用新版本的正确方式。 info "应用新镜像并重启 tenant-service ..." $_COMPOSE up -d --no-deps --remove-orphans tenant-service ok "tenant-service 已更新" info "应用新镜像并重启 nginx ..." $_COMPOSE up -d --no-deps --remove-orphans nginx ok "nginx 已更新" # --------------------------------------------------------------------------- # Step 7 — 等待 tenant-service 健康 # --------------------------------------------------------------------------- printf ' 等待 tenant-service 就绪' _healthy=0 for i in $(seq 1 40); do _code="$(curl -skL --noproxy '*' -o /dev/null -w '%{http_code}' --max-time 4 \ "http://127.0.0.1:11224/actuator/health" 2>/dev/null || echo 000)" if [ "$_code" = "200" ]; then printf '\n' ok "tenant-service 健康 (HTTP 200)" _healthy=1 break fi printf '.' sleep 3 done if [ "$_healthy" -eq 0 ]; then printf '\n' warn "tenant-service 在 120s 内未响应,请检查: docker compose logs --tail 60 tenant-service" fi # --------------------------------------------------------------------------- # Step 8 — 自动处理积压的 PENDING 服务开通申请 # --------------------------------------------------------------------------- log "处理积压的服务开通申请" if [ "$_healthy" -eq 1 ]; then _approve_resp="$(curl -sk --noproxy '*' -X POST \ "http://127.0.0.1:11224/api/private/admin/approve-pending-requests" \ --max-time 10 2>/dev/null || true)" _approved="$(printf '%s' "$_approve_resp" | \ python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('data',{}).get('approved',0))" \ 2>/dev/null || echo 0)" if [ "${_approved:-0}" -gt 0 ]; then ok "已自动开通 ${_approved} 条积压申请" else ok "无积压申请" fi else warn "tenant-service 未健康,跳过积压申请处理" fi # --------------------------------------------------------------------------- # Step 9 — WebSocket 连通性自检 # --------------------------------------------------------------------------- log "检查 WebSocket 连通性" # 从服务器本地探测容器 nginx 的 /ws/im 升级握手是否正常 # 正常响应:HTTP 101 Switching Protocols;异常:400 / 连接拒绝等 _ws_probe="$(curl -sk --noproxy '*' --max-time 5 \ -o /dev/null -w '%{http_code}' \ -H "Upgrade: websocket" \ -H "Connection: Upgrade" \ -H "Sec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==" \ -H "Sec-WebSocket-Version: 13" \ "http://127.0.0.1:${_NGINX_PORT}/ws/im" 2>/dev/null || echo 000)" # curl 对 WebSocket 握手的处理:服务端回 101 后 curl 不会说 WS 协议, # 连接立刻断开,-w '%{http_code}' 可能输出 "101" 或 "101000"(101 + 后续 000)。 # 只要首三位是 101 即代表握手成功。 _ws_ok=0 printf '%s' "$_ws_probe" | grep -q '^101' && _ws_ok=1 if [ "$_ws_ok" -eq 1 ]; then ok "容器 nginx → im-service WebSocket 握手正常 (HTTP 101)" # 进一步探测外部域名(需要上层 TLS 代理正确透传 Upgrade 头) if [ -n "$_CONSOLE_DOMAIN" ]; then _ext_ws_url="$(printf '%s' "$_SDK_IM_WS_URL" | sed 's|wss://|https://|; s|ws://|http://|')" _ext_code="$(curl -sk --max-time 8 \ -o /dev/null -w '%{http_code}' \ -H "Upgrade: websocket" \ -H "Connection: Upgrade" \ -H "Sec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==" \ -H "Sec-WebSocket-Version: 13" \ "${_ext_ws_url}" 2>/dev/null || echo 000)" if printf '%s' "$_ext_code" | grep -q '^101'; then ok "外部域名 WebSocket 握手正常 (HTTP 101) → ${_SDK_IM_WS_URL}" else warn "外部域名 WebSocket 握手失败 (HTTP ${_ext_code}) → ${_SDK_IM_WS_URL}" printf '\n' printf ' 上层代理(云 SLB / CDN / 其他服务器 nginx)需在转发规则中加入:\n\n' printf ' proxy_http_version 1.1;\n' printf ' proxy_set_header Upgrade $http_upgrade;\n' printf ' proxy_set_header Connection "upgrade";\n' printf ' proxy_read_timeout 3600s;\n\n' printf ' 云负载均衡(SLB/ALB):在监听器配置中开启 WebSocket 支持。\n' printf ' Nginx:在 location / 块中补齐以上四行,nginx -s reload 生效。\n\n' fi fi else warn "容器 nginx WebSocket 握手异常 (HTTP ${_ws_probe}),请检查 im-service 是否正常运行" info "排查命令: docker compose logs --tail 50 im-service" fi # --------------------------------------------------------------------------- # Step 10 — 全量验证 # --------------------------------------------------------------------------- log "运行全量验证" if BASE_URL="http://127.0.0.1:${_NGINX_PORT}" bash "$ROOT_DIR/scripts/verify.sh"; then ok "全量验证通过" else warn "部分验证项未通过,请查看上方输出" fi # --------------------------------------------------------------------------- # 完成 # --------------------------------------------------------------------------- printf "\n${BOLD}══════════════════════════════════════════════════${RESET}\n" printf "${BOLD} 热更新完成${RESET}\n" printf "${BOLD}══════════════════════════════════════════════════${RESET}\n" printf "\n 访问地址:${BOLD}%s${RESET}\n" "$_CONSOLE_DOMAIN" printf " IM WS :${BOLD}%s${RESET}\n\n" "$_SDK_IM_WS_URL"