From 315db8b5eca08d8301774b4b8ef702661120ff7e Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 2 Jul 2024 20:58:43 +1000 Subject: [PATCH] rootfs: try to scope MkdirAll to stay inside the rootfs While we use SecureJoin to try to make all of our target paths inside the container safe, SecureJoin is not safe against an attacker than can change the path after we "resolve" it. os.MkdirAll can inadvertently follow symlinks and thus an attacker could end up tricking runc into creating empty directories on the host (note that the container doesn't get access to these directories, and the host just sees empty directories). However, this could potentially cause DoS issues by (for instance) creating a directory in a conf.d directory for a daemon that doesn't handle subdirectories properly. In addition, the handling for creating file bind-mounts did a plain open(O_CREAT) on the SecureJoin'd path, which is even more obviously unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an attacker to cause data loss...). Regardless of the symlink issue, opening an untrusted file could result in a DoS if the file is a hung tty or some other "nasty" file. We can use mknodat to safely create a regular file without opening anything anyway (O_CREAT|O_EXCL would also work but it makes the logic a bit more complicated, and we don't want to open the file for any particular reason anyway). libpathrs[1] is the long-term solution for these kinds of problems, but for now we can patch this particular issue by creating a more restricted MkdirAll that refuses to resolve symlinks and does the creation using file descriptors. This is loosely based on a more secure version that filepath-securejoin now has[2] and will be added to libpathrs soon[3]. [1]: https://github.com/openSUSE/libpathrs [2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0 [3]: https://github.com/openSUSE/libpathrs/issues/10 Fixes: CVE-2024-45310 Signed-off-by: Aleksa Sarai --- libcontainer/rootfs_linux.go | 34 ++++---- libcontainer/system/linux.go | 44 +++++++++++ libcontainer/utils/utils_unix.go | 128 ++++++++++++++++++++++++++++++- 3 files changed, 189 insertions(+), 17 deletions(-) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index a948090..8a642b7 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package libcontainer @@ -18,6 +19,7 @@ import ( "github.com/docker/docker/pkg/mount" "github.com/docker/docker/pkg/symlink" "github.com/mrunalp/fileutils" + "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" @@ -163,7 +165,7 @@ func mountCmd(cmd configs.Command) error { } func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { - var err error + var err error switch m.Device { case "proc", "sysfs": @@ -187,7 +189,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { if strings.HasPrefix(m.Destination, "/proc/sys/") { return nil } - if err := os.MkdirAll(dest, 0755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil { return err } // Selinux kernels do not support labeling of /proc or /sys. @@ -195,16 +197,16 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } var dest = m.Destination - if !strings.HasPrefix(dest, rootfs) { - dest, err = securejoin.SecureJoin(rootfs, m.Destination) - if err != nil { - return err - } - } + if !strings.HasPrefix(dest, rootfs) { + dest, err = securejoin.SecureJoin(rootfs, m.Destination) + if err != nil { + return err + } + } switch m.Device { case "mqueue": - if err := os.MkdirAll(dest, 0755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil { return err } if err := mountPropagate(m, rootfs, mountLabel); err != nil { @@ -226,7 +228,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } m.Destination = dest if stat, err := os.Stat(dest); err != nil { - if err := os.MkdirAll(dest, 0755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil { return err } } else { @@ -283,7 +285,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } // update the mount with the correct dest after symlinks are resolved. m.Destination = dest - if err := createIfNotExists(dest, stat.IsDir()); err != nil { + if err := createIfNotExists(rootfs, dest, stat.IsDir()); err != nil { return err } if err := mountPropagate(m, rootfs, mountLabel); err != nil { @@ -371,7 +373,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { } // update the mount with the correct dest after symlinks are resolved. m.Destination = dest - if err := os.MkdirAll(dest, 0755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil { return err } return mountPropagate(m, rootfs, mountLabel) @@ -537,7 +539,7 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { if err != nil { return err } - if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0755); err != nil { return err } @@ -746,13 +748,13 @@ func msMoveRoot(rootfs string) error { } // createIfNotExists creates a file or a directory only if it does not already exist. -func createIfNotExists(path string, isDir bool) error { +func createIfNotExists(rootfs, path string, isDir bool) error { if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { if isDir { - return os.MkdirAll(path, 0755) + return utils.MkdirAllInRoot(rootfs, path, 0755) } - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(path), 0755); err != nil { return err } f, err := os.OpenFile(path, os.O_CREATE, 0755) diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index 1afc52b..9aaaada 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux package system @@ -7,8 +8,12 @@ import ( "fmt" "os" "os/exec" + "runtime" + "strings" "syscall" "unsafe" + + "golang.org/x/sys/unix" ) // If arg2 is nonzero, set the "child subreaper" attribute of the @@ -134,6 +139,45 @@ func SetSubreaper(i int) error { return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) } +func prepareAt(dir *os.File, path string) (int, string) { + if dir == nil { + return unix.AT_FDCWD, path + } + + // Rather than just filepath.Join-ing path here, do it manually so the + // error and handle correctly indicate cases like path=".." as being + // relative to the correct directory. The handle.Name() might end up being + // wrong but because this is (currently) only used in MkdirAllInRoot, that + // isn't a problem. // isn't a problem. + dirName := dir.Name() + if !strings.HasSuffix(dirName, "/") { + dirName += "/" + } + fullPath := dirName + path + + return int(dir.Fd()), fullPath +} + +func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + fd, err := unix.Openat(dirFd, path, flags, mode) + if err != nil { + return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil +} + +func Mkdirat(dir *os.File, path string, mode uint32) error { + dirFd, fullPath := prepareAt(dir, path) + err := unix.Mkdirat(dirFd, path, mode) + if err != nil { + err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return err +} + func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) { _, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) if e1 != 0 { diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go index cfacfc2..a19f17c 100644 --- a/libcontainer/utils/utils_unix.go +++ b/libcontainer/utils/utils_unix.go @@ -5,17 +5,20 @@ package utils import ( + "errors" "fmt" "math" "os" "path/filepath" "runtime" "strconv" + "strings" "sync" _ "unsafe" // for go:linkname - securejoin "github.com/cyphar/filepath-securejoin" "github.com/Sirupsen/logrus" + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/opencontainers/runc/libcontainer/system" "golang.org/x/sys/unix" ) @@ -157,6 +160,20 @@ func NewSockPair(name string) (parent, child *os.File, err error) { return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil } +// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), +// but properly handling the case where path or root are "/". +// +// NOTE: The return value only make sense if the path doesn't contain "..". +func IsLexicallyInRoot(root, path string) bool { + if root != "/" { + root += "/" + } + if path != "/" { + path += "/" + } + return strings.HasPrefix(path, root) +} + // WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) // corresponding to the unsafePath resolved within the root. Before passing the // fd, this path is verified to have been inside the root -- so operating on it @@ -262,3 +279,112 @@ func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) { func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) } + +// MkdirAllInRootOpen attempts to make +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// os.MkdirAll(path, mode) +// os.Open(path) +// +// safer against attacks where components in the path are changed between +// SecureJoin returning and MkdirAll (or Open) being called. In particular, we +// try to detect any symlink components in the path while we are doing the +// MkdirAll. +// +// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode +// (the suid/sgid/sticky bits are not the same as for os.FileMode). +// +// NOTE: If unsafePath is a subpath of root, we assume that you have already +// called SecureJoin and so we use the provided path verbatim without resolving +// any symlinks (this is done in a way that avoids symlink-exchange races). +// This means that the path also must not contain ".." elements, otherwise an +// error will occur. +// +// This is a somewhat less safe alternative to +// , but it should +// detect attempts to trick us into creating directories outside of the root. +// We should migrate to securejoin.MkdirAll once it is merged. +func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) { + // If the path is already "within" the root, use it verbatim. + fullPath := unsafePath + if !IsLexicallyInRoot(root, unsafePath) { + var err error + fullPath, err = securejoin.SecureJoin(root, unsafePath) + if err != nil { + return nil, err + } + } + subPath, err := filepath.Rel(root, fullPath) + if err != nil { + return nil, err + } + + // Check for any silly mode bits. + if mode&^0o7777 != 0 { + return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) + } + + currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open root handle: %w", err) + } + defer func() { + if Err != nil { + currentDir.Close() + } + }() + + for _, part := range strings.Split(subPath, string(filepath.Separator)) { + switch part { + case "", ".": + // Skip over no-op components. + continue + case "..": + return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath) + } + + nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + switch { + case err == nil: + // Update the currentDir. + _ = currentDir.Close() + currentDir = nextDir + + case errors.Is(err, unix.ENOTDIR): + // This might be a symlink or some other random file. Either way, + // error out. + return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR) + + case errors.Is(err, os.ErrNotExist): + // Luckily, mkdirat will not follow trailing symlinks, so this is + // safe to do as-is. + if err := system.Mkdirat(currentDir, part, mode); err != nil { + return nil, err + } + // Open the new directory. There is a race here where an attacker + // could swap the directory with a different directory, but + // MkdirAll's fuzzy semantics mean we don't care about that. + nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open newly created directory: %w", err) + } + // Update the currentDir. + _ = currentDir.Close() + currentDir = nextDir + + default: + return nil, err + } + } + return currentDir, nil +} + +// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +// returned handle, for callers that don't need to use it. +func MkdirAllInRoot(root, unsafePath string, mode uint32) error { + f, err := MkdirAllInRootOpen(root, unsafePath, mode) + if err == nil { + _ = f.Close() + } + return err +} -- 2.33.0