112 lines
4.2 KiB
Diff
112 lines
4.2 KiB
Diff
From 6933b605d03ae7e8166bbb9826dd1eb914a9742e Mon Sep 17 00:00:00 2001
|
|
From: zhongtao <zhongtao17@huawei.com>
|
|
Date: Wed, 26 Jul 2023 17:07:15 +1400
|
|
Subject: [PATCH 01/11] fix stuck health check blocking container stop bugs
|
|
|
|
Signed-off-by: zhongtao <zhongtao17@huawei.com>
|
|
---
|
|
src/daemon/modules/api/container_api.h | 1 +
|
|
.../container/health_check/health_check.c | 43 ++++++++++++++++---
|
|
2 files changed, 38 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/src/daemon/modules/api/container_api.h b/src/daemon/modules/api/container_api.h
|
|
index ed97633f..4c1dd29a 100644
|
|
--- a/src/daemon/modules/api/container_api.h
|
|
+++ b/src/daemon/modules/api/container_api.h
|
|
@@ -51,6 +51,7 @@ typedef struct health_check_manager {
|
|
health_check_monitor_status_t monitor_status;
|
|
// Used to wait for the health check minotor thread to close
|
|
bool monitor_exist;
|
|
+ pthread_t monitor_tid;
|
|
} health_check_manager_t;
|
|
|
|
typedef struct _container_state_t_ {
|
|
diff --git a/src/daemon/modules/container/health_check/health_check.c b/src/daemon/modules/container/health_check/health_check.c
|
|
index e9dcbdb9..bd75382f 100644
|
|
--- a/src/daemon/modules/container/health_check/health_check.c
|
|
+++ b/src/daemon/modules/container/health_check/health_check.c
|
|
@@ -169,15 +169,49 @@ static bool get_monitor_exist_flag(health_check_manager_t *health)
|
|
|
|
static void close_health_check_monitor(container_t *cont)
|
|
{
|
|
+ int64_t timeout = 0;
|
|
+ /* wait 1 second to cancel monitor thread (2000 * 500 µs) */
|
|
+ int64_t retries = 2000;
|
|
+ int ret = -1;
|
|
+
|
|
if (cont == NULL || cont->health_check == NULL) {
|
|
return;
|
|
}
|
|
+ pthread_t monitor_tid = cont->health_check->monitor_tid;
|
|
|
|
set_monitor_stop_status(cont->health_check);
|
|
// ensure that the monitor process exits
|
|
while (get_monitor_exist_flag(cont->health_check)) {
|
|
util_usleep_nointerupt(500);
|
|
+ timeout += 1;
|
|
+ if (timeout <= retries) {
|
|
+ continue;
|
|
+ }
|
|
+ if (monitor_tid <= 0) {
|
|
+ break;
|
|
+ }
|
|
+ DEBUG("Try to cancel monitor thread");
|
|
+ ret = pthread_cancel(monitor_tid);
|
|
+ if (ret != 0 && ret != ESRCH) {
|
|
+ WARN("Failed to cancel monitor thread, try to kill thread");
|
|
+ pthread_kill(monitor_tid, SIGKILL);
|
|
+ }
|
|
+ break;
|
|
}
|
|
+
|
|
+ if (monitor_tid > 0 && pthread_join(monitor_tid, NULL) != 0) {
|
|
+ ERROR("Failed to join monitor thread");
|
|
+ }
|
|
+
|
|
+ // monitor_tid = 0: it corresponds to the initialization of the health check thread when starting the container.
|
|
+ // At this time, the purpose is to stop the health check thread process before starting a new health check thread,
|
|
+ // and there is no need to set the health check status.
|
|
+ if (monitor_tid > 0) {
|
|
+ set_health_status(cont, UNHEALTHY);
|
|
+ set_monitor_exist_flag(cont->health_check, false);
|
|
+ }
|
|
+
|
|
+ cont->health_check->monitor_tid = 0;
|
|
}
|
|
|
|
// Called when the container is being stopped (whether because the health check is
|
|
@@ -228,6 +262,8 @@ static health_check_manager_t *health_check_manager_new()
|
|
|
|
health_check->monitor_exist = false;
|
|
|
|
+ health_check->monitor_tid = 0;
|
|
+
|
|
return health_check;
|
|
cleanup:
|
|
health_check_manager_free(health_check);
|
|
@@ -887,20 +923,15 @@ void container_update_health_monitor(const char *container_id)
|
|
|
|
want_running = container_is_running(cont->state) && !container_is_paused(cont->state) && probe != HEALTH_NONE;
|
|
if (want_running) {
|
|
- pthread_t monitor_tid = { 0 };
|
|
char *cid = util_strdup_s(container_id);
|
|
// ensured that the health check monitor process is stopped
|
|
close_health_check_monitor(cont);
|
|
init_monitor_idle_status(cont->health_check);
|
|
- if (pthread_create(&monitor_tid, NULL, health_check_monitor, (void *)cid)) {
|
|
+ if (pthread_create(&cont->health_check->monitor_tid, NULL, health_check_monitor, (void *)cid)) {
|
|
free(cid);
|
|
ERROR("Failed to create thread to monitor health check...");
|
|
goto out;
|
|
}
|
|
- if (pthread_detach(monitor_tid)) {
|
|
- ERROR("Failed to detach the health check monitor thread");
|
|
- goto out;
|
|
- }
|
|
} else {
|
|
close_health_check_monitor(cont);
|
|
}
|
|
--
|
|
2.25.1
|
|
|