fix(安全中心): 修复健康检查误判 force-recreate 旧容器为失败

force-recreate 会先停掉旧容器(status=exited),若此时健康检查
轮询到旧容器的 exited 状态,会误判新容器失败并触发不必要的回滚。

修复方式:
- 新增 getNewestContainerId() 在 compose up 后立即拿到新容器 ID
- waitForServiceStable 接受 containerId 参数,通过 docker inspect
  精确轮询新容器状态,完全隔离旧容器的干扰
- 退化路径(containerId=null)保留原有服务名轮询逻辑

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
这个提交包含在:
XuqmGroup 2026-06-13 01:18:01 +08:00
父节点 9a9524ac07
当前提交 9084831b2a

查看文件

@ -799,8 +799,10 @@ public class SystemUpdateService {
emit.accept(" restarting " + svc + " ..."); emit.accept(" restarting " + svc + " ...");
exec(emit, "docker", "compose", "-f", composeFile, exec(emit, "docker", "compose", "-f", composeFile,
"up", "-d", "--no-deps", "--force-recreate", svc); "up", "-d", "--no-deps", "--force-recreate", svc);
// 拿到 compose up 之后最新创建的容器 ID排除旧容器干扰
String newContainerId = getNewestContainerId(svc);
boolean healthy = waitForServiceStable(emit, svc, HEALTH_CHECK_TIMEOUT_SEC); boolean healthy = waitForServiceStable(emit, svc, newContainerId, HEALTH_CHECK_TIMEOUT_SEC);
if (healthy) { if (healthy) {
emit.accept(" " + svc + ""); emit.accept(" " + svc + "");
} else { } else {
@ -892,12 +894,34 @@ public class SystemUpdateService {
} }
/** /**
* 轮询容器状态直到容器持续 HEALTH_STABLE_REQUIRED_SEC 秒保持 running * 获取指定服务最新创建的容器 ID含已停止容器
* 若检测到容器已 exited立即返回 false快速失败 * docker compose up --force-recreate 之后立即调用确保拿到新容器而非旧容器
*/ */
private boolean waitForServiceStable(Consumer<String> emit, String service, int timeoutSeconds) { private String getNewestContainerId(String service) {
int elapsed = 0; try {
int stableSeconds = 0; Process p = new ProcessBuilder(
"docker", "ps", "-a", "-n", "1",
"--filter", "label=com.docker.compose.service=" + service,
"--format", "{{.ID}}"
).redirectErrorStream(true).start();
String out = new String(p.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
p.waitFor();
return out.isEmpty() ? null : out.split("\n")[0].trim();
} catch (Exception e) {
return null;
}
}
/**
* 轮询指定容器的状态直到该容器持续 HEALTH_STABLE_REQUIRED_SEC 秒保持 running
* 通过 containerId 精确定位新容器避免 --force-recreate 停掉旧容器时的误判
* 若容器已 exited立即返回 false快速失败
*
* @param containerId 新容器 ID null 时退化为服务名轮询
*/
private boolean waitForServiceStable(Consumer<String> emit, String service, String containerId, int timeoutSeconds) {
int elapsed = 0;
int stableSeconds = 0;
while (elapsed < timeoutSeconds) { while (elapsed < timeoutSeconds) {
try { Thread.sleep(HEALTH_CHECK_INTERVAL_SEC * 1000L); } try { Thread.sleep(HEALTH_CHECK_INTERVAL_SEC * 1000L); }
@ -906,39 +930,43 @@ public class SystemUpdateService {
stableSeconds += HEALTH_CHECK_INTERVAL_SEC; stableSeconds += HEALTH_CHECK_INTERVAL_SEC;
try { try {
// 检查 running 状态 String statusLine;
Process runPs = new ProcessBuilder( if (containerId != null) {
"docker", "ps", // 直接 inspect 新容器避免旧容器干扰
"--filter", "label=com.docker.compose.service=" + service, Process ins = new ProcessBuilder(
"--filter", "status=running", "docker", "inspect", "--format",
"--format", "{{.ID}}" "{{.State.Status}} {{.State.ExitCode}}", containerId
).redirectErrorStream(true).start();
String running = new String(runPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
runPs.waitFor();
if (running.isEmpty()) {
// 检查是否已 exited快速失败
Process exitPs = new ProcessBuilder(
"docker", "ps", "-a",
"--filter", "label=com.docker.compose.service=" + service,
"--filter", "status=exited",
"--format", "{{.Status}}"
).redirectErrorStream(true).start(); ).redirectErrorStream(true).start();
String exited = new String(exitPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim(); statusLine = new String(ins.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
exitPs.waitFor(); ins.waitFor();
if (!exited.isEmpty()) {
emit.accept(" [健康检查] " + service + " 已退出 (" + exited + "),快速判定失败");
return false;
}
stableSeconds = 0;
emit.accept(" [健康检查] " + service + " 等待启动... (" + elapsed + "/" + timeoutSeconds + "s)");
} else { } else {
// 退化模式 running 容器
Process runPs = new ProcessBuilder(
"docker", "ps",
"--filter", "label=com.docker.compose.service=" + service,
"--filter", "status=running",
"--format", "{{.ID}}"
).redirectErrorStream(true).start();
String runOut = new String(runPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
runPs.waitFor();
statusLine = runOut.isEmpty() ? "unknown 0" : "running 0";
}
if (statusLine.startsWith("running")) {
emit.accept(" [健康检查] " + service + " running (" emit.accept(" [健康检查] " + service + " running ("
+ stableSeconds + "/" + HEALTH_STABLE_REQUIRED_SEC + "s)"); + stableSeconds + "/" + HEALTH_STABLE_REQUIRED_SEC + "s)");
if (stableSeconds >= HEALTH_STABLE_REQUIRED_SEC) { if (stableSeconds >= HEALTH_STABLE_REQUIRED_SEC) {
return true; return true;
} }
} else if (statusLine.startsWith("exited")) {
// 快速失败新容器已退出
emit.accept(" [健康检查] " + service + " 已退出 (" + statusLine + "),快速判定失败");
return false;
} else {
// created / paused / restarting 等中间状态
stableSeconds = 0;
emit.accept(" [健康检查] " + service + " 等待启动... status=" + statusLine
+ " (" + elapsed + "/" + timeoutSeconds + "s)");
} }
} catch (Exception e) { } catch (Exception e) {
stableSeconds = 0; stableSeconds = 0;
@ -971,8 +999,9 @@ public class SystemUpdateService {
} }
exec(emit, "docker", "compose", "-f", composeFile, "up", "-d", "--no-deps", "--force-recreate", service); exec(emit, "docker", "compose", "-f", composeFile, "up", "-d", "--no-deps", "--force-recreate", service);
String rollbackContainerId = getNewestContainerId(service);
boolean ok = waitForServiceStable(emit, service, 60); boolean ok = waitForServiceStable(emit, service, rollbackContainerId, 60);
if (ok) { if (ok) {
emit.accept(" [回滚] " + service + " 已回滚到旧版本 ✓"); emit.accept(" [回滚] " + service + " 已回滚到旧版本 ✓");
} else { } else {