fix(安全中心): 修复健康检查误判 force-recreate 旧容器为失败
force-recreate 会先停掉旧容器(status=exited),若此时健康检查 轮询到旧容器的 exited 状态,会误判新容器失败并触发不必要的回滚。 修复方式: - 新增 getNewestContainerId() 在 compose up 后立即拿到新容器 ID - waitForServiceStable 接受 containerId 参数,通过 docker inspect 精确轮询新容器状态,完全隔离旧容器的干扰 - 退化路径(containerId=null)保留原有服务名轮询逻辑 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
这个提交包含在:
父节点
9a9524ac07
当前提交
9084831b2a
@ -799,8 +799,10 @@ public class SystemUpdateService {
|
|||||||
emit.accept(" restarting " + svc + " ...");
|
emit.accept(" restarting " + svc + " ...");
|
||||||
exec(emit, "docker", "compose", "-f", composeFile,
|
exec(emit, "docker", "compose", "-f", composeFile,
|
||||||
"up", "-d", "--no-deps", "--force-recreate", svc);
|
"up", "-d", "--no-deps", "--force-recreate", svc);
|
||||||
|
// 拿到 compose up 之后最新创建的容器 ID,排除旧容器干扰
|
||||||
|
String newContainerId = getNewestContainerId(svc);
|
||||||
|
|
||||||
boolean healthy = waitForServiceStable(emit, svc, HEALTH_CHECK_TIMEOUT_SEC);
|
boolean healthy = waitForServiceStable(emit, svc, newContainerId, HEALTH_CHECK_TIMEOUT_SEC);
|
||||||
if (healthy) {
|
if (healthy) {
|
||||||
emit.accept(" " + svc + " ✓");
|
emit.accept(" " + svc + " ✓");
|
||||||
} else {
|
} else {
|
||||||
@ -892,10 +894,32 @@ public class SystemUpdateService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 轮询容器状态,直到容器持续 HEALTH_STABLE_REQUIRED_SEC 秒保持 running。
|
* 获取指定服务最新创建的容器 ID(含已停止容器)。
|
||||||
* 若检测到容器已 exited,立即返回 false(快速失败)。
|
* 在 docker compose up --force-recreate 之后立即调用,确保拿到新容器而非旧容器。
|
||||||
*/
|
*/
|
||||||
private boolean waitForServiceStable(Consumer<String> emit, String service, int timeoutSeconds) {
|
private String getNewestContainerId(String service) {
|
||||||
|
try {
|
||||||
|
Process p = new ProcessBuilder(
|
||||||
|
"docker", "ps", "-a", "-n", "1",
|
||||||
|
"--filter", "label=com.docker.compose.service=" + service,
|
||||||
|
"--format", "{{.ID}}"
|
||||||
|
).redirectErrorStream(true).start();
|
||||||
|
String out = new String(p.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
|
||||||
|
p.waitFor();
|
||||||
|
return out.isEmpty() ? null : out.split("\n")[0].trim();
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 轮询指定容器的状态,直到该容器持续 HEALTH_STABLE_REQUIRED_SEC 秒保持 running。
|
||||||
|
* 通过 containerId 精确定位新容器,避免 --force-recreate 停掉旧容器时的误判。
|
||||||
|
* 若容器已 exited,立即返回 false(快速失败)。
|
||||||
|
*
|
||||||
|
* @param containerId 新容器 ID;为 null 时退化为服务名轮询
|
||||||
|
*/
|
||||||
|
private boolean waitForServiceStable(Consumer<String> emit, String service, String containerId, int timeoutSeconds) {
|
||||||
int elapsed = 0;
|
int elapsed = 0;
|
||||||
int stableSeconds = 0;
|
int stableSeconds = 0;
|
||||||
|
|
||||||
@ -906,39 +930,43 @@ public class SystemUpdateService {
|
|||||||
stableSeconds += HEALTH_CHECK_INTERVAL_SEC;
|
stableSeconds += HEALTH_CHECK_INTERVAL_SEC;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// 检查 running 状态
|
String statusLine;
|
||||||
|
if (containerId != null) {
|
||||||
|
// 直接 inspect 新容器,避免旧容器干扰
|
||||||
|
Process ins = new ProcessBuilder(
|
||||||
|
"docker", "inspect", "--format",
|
||||||
|
"{{.State.Status}} {{.State.ExitCode}}", containerId
|
||||||
|
).redirectErrorStream(true).start();
|
||||||
|
statusLine = new String(ins.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
|
||||||
|
ins.waitFor();
|
||||||
|
} else {
|
||||||
|
// 退化模式:查 running 容器
|
||||||
Process runPs = new ProcessBuilder(
|
Process runPs = new ProcessBuilder(
|
||||||
"docker", "ps",
|
"docker", "ps",
|
||||||
"--filter", "label=com.docker.compose.service=" + service,
|
"--filter", "label=com.docker.compose.service=" + service,
|
||||||
"--filter", "status=running",
|
"--filter", "status=running",
|
||||||
"--format", "{{.ID}}"
|
"--format", "{{.ID}}"
|
||||||
).redirectErrorStream(true).start();
|
).redirectErrorStream(true).start();
|
||||||
String running = new String(runPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
|
String runOut = new String(runPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
|
||||||
runPs.waitFor();
|
runPs.waitFor();
|
||||||
|
statusLine = runOut.isEmpty() ? "unknown 0" : "running 0";
|
||||||
if (running.isEmpty()) {
|
|
||||||
// 检查是否已 exited(快速失败)
|
|
||||||
Process exitPs = new ProcessBuilder(
|
|
||||||
"docker", "ps", "-a",
|
|
||||||
"--filter", "label=com.docker.compose.service=" + service,
|
|
||||||
"--filter", "status=exited",
|
|
||||||
"--format", "{{.Status}}"
|
|
||||||
).redirectErrorStream(true).start();
|
|
||||||
String exited = new String(exitPs.getInputStream().readAllBytes(), StandardCharsets.UTF_8).trim();
|
|
||||||
exitPs.waitFor();
|
|
||||||
|
|
||||||
if (!exited.isEmpty()) {
|
|
||||||
emit.accept(" [健康检查] " + service + " 已退出 (" + exited + "),快速判定失败");
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
stableSeconds = 0;
|
|
||||||
emit.accept(" [健康检查] " + service + " 等待启动... (" + elapsed + "/" + timeoutSeconds + "s)");
|
if (statusLine.startsWith("running")) {
|
||||||
} else {
|
|
||||||
emit.accept(" [健康检查] " + service + " running ("
|
emit.accept(" [健康检查] " + service + " running ("
|
||||||
+ stableSeconds + "/" + HEALTH_STABLE_REQUIRED_SEC + "s)");
|
+ stableSeconds + "/" + HEALTH_STABLE_REQUIRED_SEC + "s)");
|
||||||
if (stableSeconds >= HEALTH_STABLE_REQUIRED_SEC) {
|
if (stableSeconds >= HEALTH_STABLE_REQUIRED_SEC) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else if (statusLine.startsWith("exited")) {
|
||||||
|
// 快速失败:新容器已退出
|
||||||
|
emit.accept(" [健康检查] " + service + " 已退出 (" + statusLine + "),快速判定失败");
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
// created / paused / restarting 等中间状态
|
||||||
|
stableSeconds = 0;
|
||||||
|
emit.accept(" [健康检查] " + service + " 等待启动... status=" + statusLine
|
||||||
|
+ " (" + elapsed + "/" + timeoutSeconds + "s)");
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
stableSeconds = 0;
|
stableSeconds = 0;
|
||||||
@ -971,8 +999,9 @@ public class SystemUpdateService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
exec(emit, "docker", "compose", "-f", composeFile, "up", "-d", "--no-deps", "--force-recreate", service);
|
exec(emit, "docker", "compose", "-f", composeFile, "up", "-d", "--no-deps", "--force-recreate", service);
|
||||||
|
String rollbackContainerId = getNewestContainerId(service);
|
||||||
|
|
||||||
boolean ok = waitForServiceStable(emit, service, 60);
|
boolean ok = waitForServiceStable(emit, service, rollbackContainerId, 60);
|
||||||
if (ok) {
|
if (ok) {
|
||||||
emit.accept(" [回滚] " + service + " 已回滚到旧版本 ✓");
|
emit.accept(" [回滚] " + service + " 已回滚到旧版本 ✓");
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
正在加载...
在新工单中引用
屏蔽一个用户