150 lines
5.5 KiB
Diff
150 lines
5.5 KiB
Diff
From 38ef31939056dea30017d782988ba1a46df4784a Mon Sep 17 00:00:00 2001
|
|
From: Petr Pavlu <petr.pavlu@suse.com>
|
|
Date: Tue, 9 Feb 2021 13:13:25 +0100
|
|
Subject: [PATCH] fix(shutdown): add timeout to umount calls
|
|
|
|
When terminating a system, the shutdown module attempts to unmount all
|
|
file systems from under /oldroot. This reaps remaining file systems that
|
|
systemd cannot unmount and detaches /oldroot itself.
|
|
|
|
In case that running umount for some file system repeatedly fails, the
|
|
module reports this error and continues the processing in order to
|
|
shutdown the system. This handles a condition when the umount command
|
|
actually terminates but it can happen in some cases that it waits
|
|
indefinitely.
|
|
|
|
An example with NFS mounts:
|
|
# mount -t nfs 192.168.0.1:/srv/nfs/dir /mnt/nfs
|
|
# mkdir /mnt/nfs/dir2
|
|
# mount -t nfs 192.168.0.1:/srv/nfs/dir2 /mnt/nfs/dir2
|
|
# touch /mnt/nfs/dir2/file
|
|
# systemd-run -pKillMode=none -pSendSIGKILL=no tail -f /mnt/nfs/dir2/file
|
|
Running as unit: run-r367825c967ca4d88a793ae4793c02f8b.service
|
|
# systemctl poweroff
|
|
|
|
The invoked tail command escapes normal termination by systemd and
|
|
prevents stopping mnt-nfs.mount and mnt-nfs-dir2.mount as it makes the
|
|
mounts busy. Systemd then again attempts to unmount these file systems
|
|
in systemd-shutdown but this fails as well. The utility tries to unmount
|
|
/mnt/nfs/dir2 but the kernel waits indefinitely doing a path lookup for
|
|
/mnt/nfs because network is no longer available at that point. The
|
|
systemd-shutdown gives up after 90 seconds. Finally, the control is
|
|
transferred to dracut which tries to unmount the file systems in the
|
|
same way and ends up indefinitely waiting on umount to finish.
|
|
|
|
This situation causes that the system hangs during shutdown. The patch
|
|
improves the shutdown module to add a timeout of 90 seconds for the
|
|
umount operation and continue with the shutdown if it gets reached,
|
|
similarly to what systemd-shutdown does.
|
|
|
|
Conflict:NA
|
|
Reference:https://github.com/dracutdevs/dracut/commit/38ef31939056dea30017d782988ba1a46df4784a
|
|
---
|
|
dracut.cmdline.7.asc | 6 ++++
|
|
modules.d/99shutdown/module-setup.sh | 2 +-
|
|
modules.d/99shutdown/shutdown.sh | 44 +++++++++++++++++++++++-----
|
|
3 files changed, 43 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
|
|
index 0951c0e0..4e598b6b 100644
|
|
--- a/dracut.cmdline.7.asc
|
|
+++ b/dracut.cmdline.7.asc
|
|
@@ -167,6 +167,12 @@ Misc
|
|
specify the controlling terminal for the console.
|
|
This is useful, if you have multiple "console=" arguments.
|
|
|
|
+**rd.shutdown.timeout.umount=**__<seconds>__::
|
|
+ specify how long dracut should wait for an individual umount to finish
|
|
+ during shutdown. This avoids the system from blocking when unmounting a file
|
|
+ system cannot complete and waits indefinitely. Value '0' means to wait
|
|
+ 'forever'. The default is 90 seconds.
|
|
+
|
|
[[dracutkerneldebug]]
|
|
Debug
|
|
~~~~~
|
|
diff --git a/modules.d/99shutdown/module-setup.sh b/modules.d/99shutdown/module-setup.sh
|
|
index dfd6caa2..ff9b7734 100755
|
|
--- a/modules.d/99shutdown/module-setup.sh
|
|
+++ b/modules.d/99shutdown/module-setup.sh
|
|
@@ -14,7 +14,7 @@ depends() {
|
|
# called by dracut
|
|
install() {
|
|
local _d
|
|
- inst_multiple umount poweroff reboot halt losetup stat sleep
|
|
+ inst_multiple umount poweroff reboot halt losetup stat sleep timeout
|
|
inst_multiple -o kexec
|
|
inst "$moddir/shutdown.sh" "$prefix/shutdown"
|
|
[ -e "${initdir}/lib" ] || mkdir -m 0755 -p ${initdir}/lib
|
|
diff --git a/modules.d/99shutdown/shutdown.sh b/modules.d/99shutdown/shutdown.sh
|
|
index a30a126f..90a0b9e1 100755
|
|
--- a/modules.d/99shutdown/shutdown.sh
|
|
+++ b/modules.d/99shutdown/shutdown.sh
|
|
@@ -46,28 +46,56 @@ warn "Killing all remaining processes"
|
|
|
|
killall_proc_mountpoint /oldroot || sleep 0.2
|
|
|
|
+# Timeout for umount calls. The value can be set to 0 to wait forever.
|
|
+_umount_timeout=$(getarg rd.shutdown.timeout.umount)
|
|
+_umount_timeout=${_umount_timeout:-90s}
|
|
+_timed_out_umounts=""
|
|
+
|
|
umount_a() {
|
|
+ local _verbose="n"
|
|
+ if [ "$1" = "-v" ]; then
|
|
+ _verbose="y"; shift
|
|
+ exec 7>&2
|
|
+ else
|
|
+ exec 7>/dev/null
|
|
+ fi
|
|
+
|
|
local _did_umount="n"
|
|
while read a mp a || [ -n "$mp" ]; do
|
|
- if strstr "$mp" oldroot; then
|
|
- if umount "$mp"; then
|
|
- _did_umount="y"
|
|
- warn "Unmounted $mp."
|
|
- fi
|
|
+ strstr "$mp" oldroot || continue
|
|
+ strstr "$_timed_out_umounts" " $mp " && continue
|
|
+
|
|
+ # Unmount the file system. The operation uses a timeout to avoid waiting
|
|
+ # indefinitely if this is e.g. a stuck NFS mount. The command is
|
|
+ # invoked in a subshell to silence also the "Killed" message that might
|
|
+ # be produced by the shell.
|
|
+ (set +m; timeout --signal=KILL "$_umount_timeout" umount "$mp") 2>&7
|
|
+ local ret=$?
|
|
+ if [ $ret -eq 0 ]; then
|
|
+ _did_umount="y"
|
|
+ warn "Unmounted $mp."
|
|
+ elif [ $ret -eq 137 ]; then
|
|
+ _timed_out_umounts="$_timed_out_umounts $mp "
|
|
+ warn "Unmounting $mp timed out."
|
|
+ elif [ "$_verbose" = "y" ]; then
|
|
+ warn "Unmounting $mp failed with status $ret."
|
|
fi
|
|
done </proc/mounts
|
|
- losetup -D
|
|
+
|
|
+ losetup -D 2>&7
|
|
+
|
|
+ exec 7>&-
|
|
[ "$_did_umount" = "y" ] && return 0
|
|
return 1
|
|
}
|
|
|
|
_cnt=0
|
|
while [ $_cnt -le 40 ]; do
|
|
- umount_a 2>/dev/null || break
|
|
+ umount_a || break
|
|
_cnt=$(($_cnt+1))
|
|
done
|
|
|
|
-[ $_cnt -ge 40 ] && umount_a
|
|
+[ $_cnt -ge 40 ] && umount_a -v
|
|
|
|
if strstr "$(cat /proc/mounts)" "/oldroot"; then
|
|
warn "Cannot umount /oldroot"
|
|
--
|
|
2.19.1
|
|
|