diff --git a/tenant-service/src/main/java/com/xuqm/tenant/service/SystemUpdateService.java b/tenant-service/src/main/java/com/xuqm/tenant/service/SystemUpdateService.java index 29efdf8..1a9cefc 100644 --- a/tenant-service/src/main/java/com/xuqm/tenant/service/SystemUpdateService.java +++ b/tenant-service/src/main/java/com/xuqm/tenant/service/SystemUpdateService.java @@ -799,8 +799,10 @@ public class SystemUpdateService { emit.accept(" restarting " + svc + " ..."); exec(emit, "docker", "compose", "-f", composeFile, "up", "-d", "--no-deps", "--force-recreate", svc); + // 拿到 compose up 之后最新创建的容器 ID,排除旧容器干扰 + String newContainerId = getNewestContainerId(svc); - boolean healthy = waitForServiceStable(emit, svc, HEALTH_CHECK_TIMEOUT_SEC); + boolean healthy = waitForServiceStable(emit, svc, newContainerId, HEALTH_CHECK_TIMEOUT_SEC); if (healthy) { emit.accept(" " + svc + " ✓"); } else { @@ -892,12 +894,34 @@ public class SystemUpdateService { } /** - * 轮询容器状态,直到容器持续 HEALTH_STABLE_REQUIRED_SEC 秒保持 running。 - * 若检测到容器已 exited,立即返回 false(快速失败)。 + * 获取指定服务最新创建的容器 ID(含已停止容器)。 + * 在 docker compose up --force-recreate 之后立即调用,确保拿到新容器而非旧容器。 */ - private boolean waitForServiceStable(Consumer emit, String service, int timeoutSeconds) { - int elapsed = 0; - int stableSeconds = 0; + private String getNewestContainerId(String service) { + try { + Process p = new ProcessBuilder( + "docker", "ps", "-a", "-n", "1", + "--filter", "label=com.docker.compose.service=" + service, + "--format", "{{.ID}}" + ).redirectErrorStream(true).start(); + String out = new String(p.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim(); + p.waitFor(); + return out.isEmpty() ? null : out.split("\n")[0].trim(); + } catch (Exception e) { + return null; + } + } + + /** + * 轮询指定容器的状态,直到该容器持续 HEALTH_STABLE_REQUIRED_SEC 秒保持 running。 + * 通过 containerId 精确定位新容器,避免 --force-recreate 停掉旧容器时的误判。 + * 若容器已 exited,立即返回 false(快速失败)。 + * + * @param containerId 新容器 ID;为 null 时退化为服务名轮询 + */ + private boolean waitForServiceStable(Consumer emit, String service, String containerId, int timeoutSeconds) { + int elapsed = 0; + int stableSeconds = 0; while (elapsed < timeoutSeconds) { try { Thread.sleep(HEALTH_CHECK_INTERVAL_SEC * 1000L); } @@ -906,39 +930,43 @@ public class SystemUpdateService { stableSeconds += HEALTH_CHECK_INTERVAL_SEC; try { - // 检查 running 状态 - Process runPs = new ProcessBuilder( - "docker", "ps", - "--filter", "label=com.docker.compose.service=" + service, - "--filter", "status=running", - "--format", "{{.ID}}" - ).redirectErrorStream(true).start(); - String running = new String(runPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim(); - runPs.waitFor(); - - if (running.isEmpty()) { - // 检查是否已 exited(快速失败) - Process exitPs = new ProcessBuilder( - "docker", "ps", "-a", - "--filter", "label=com.docker.compose.service=" + service, - "--filter", "status=exited", - "--format", "{{.Status}}" + String statusLine; + if (containerId != null) { + // 直接 inspect 新容器,避免旧容器干扰 + Process ins = new ProcessBuilder( + "docker", "inspect", "--format", + "{{.State.Status}} {{.State.ExitCode}}", containerId ).redirectErrorStream(true).start(); - String exited = new String(exitPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim(); - exitPs.waitFor(); - - if (!exited.isEmpty()) { - emit.accept(" [健康检查] " + service + " 已退出 (" + exited + "),快速判定失败"); - return false; - } - stableSeconds = 0; - emit.accept(" [健康检查] " + service + " 等待启动... (" + elapsed + "/" + timeoutSeconds + "s)"); + statusLine = new String(ins.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim(); + ins.waitFor(); } else { + // 退化模式:查 running 容器 + Process runPs = new ProcessBuilder( + "docker", "ps", + "--filter", "label=com.docker.compose.service=" + service, + "--filter", "status=running", + "--format", "{{.ID}}" + ).redirectErrorStream(true).start(); + String runOut = new String(runPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim(); + runPs.waitFor(); + statusLine = runOut.isEmpty() ? "unknown 0" : "running 0"; + } + + if (statusLine.startsWith("running")) { emit.accept(" [健康检查] " + service + " running (" + stableSeconds + "/" + HEALTH_STABLE_REQUIRED_SEC + "s)"); if (stableSeconds >= HEALTH_STABLE_REQUIRED_SEC) { return true; } + } else if (statusLine.startsWith("exited")) { + // 快速失败:新容器已退出 + emit.accept(" [健康检查] " + service + " 已退出 (" + statusLine + "),快速判定失败"); + return false; + } else { + // created / paused / restarting 等中间状态 + stableSeconds = 0; + emit.accept(" [健康检查] " + service + " 等待启动... status=" + statusLine + + " (" + elapsed + "/" + timeoutSeconds + "s)"); } } catch (Exception e) { stableSeconds = 0; @@ -971,8 +999,9 @@ public class SystemUpdateService { } exec(emit, "docker", "compose", "-f", composeFile, "up", "-d", "--no-deps", "--force-recreate", service); + String rollbackContainerId = getNewestContainerId(service); - boolean ok = waitForServiceStable(emit, service, 60); + boolean ok = waitForServiceStable(emit, service, rollbackContainerId, 60); if (ok) { emit.accept(" [回滚] " + service + " 已回滚到旧版本 ✓"); } else {