388 lines
13 KiB
Diff
388 lines
13 KiB
Diff
From 315db8b5eca08d8301774b4b8ef702661120ff7e Mon Sep 17 00:00:00 2001
|
|
From: Aleksa Sarai <cyphar@cyphar.com>
|
|
Date: Tue, 2 Jul 2024 20:58:43 +1000
|
|
Subject: [PATCH] rootfs: try to scope MkdirAll to stay inside the rootfs
|
|
|
|
While we use SecureJoin to try to make all of our target paths inside
|
|
the container safe, SecureJoin is not safe against an attacker than can
|
|
change the path after we "resolve" it.
|
|
|
|
os.MkdirAll can inadvertently follow symlinks and thus an attacker could
|
|
end up tricking runc into creating empty directories on the host (note
|
|
that the container doesn't get access to these directories, and the host
|
|
just sees empty directories). However, this could potentially cause DoS
|
|
issues by (for instance) creating a directory in a conf.d directory for
|
|
a daemon that doesn't handle subdirectories properly.
|
|
|
|
In addition, the handling for creating file bind-mounts did a plain
|
|
open(O_CREAT) on the SecureJoin'd path, which is even more obviously
|
|
unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an
|
|
attacker to cause data loss...). Regardless of the symlink issue,
|
|
opening an untrusted file could result in a DoS if the file is a hung
|
|
tty or some other "nasty" file. We can use mknodat to safely create a
|
|
regular file without opening anything anyway (O_CREAT|O_EXCL would also
|
|
work but it makes the logic a bit more complicated, and we don't want to
|
|
open the file for any particular reason anyway).
|
|
|
|
libpathrs[1] is the long-term solution for these kinds of problems, but
|
|
for now we can patch this particular issue by creating a more restricted
|
|
MkdirAll that refuses to resolve symlinks and does the creation using
|
|
file descriptors. This is loosely based on a more secure version that
|
|
filepath-securejoin now has[2] and will be added to libpathrs soon[3].
|
|
|
|
[1]: https://github.com/openSUSE/libpathrs
|
|
[2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0
|
|
[3]: https://github.com/openSUSE/libpathrs/issues/10
|
|
|
|
Fixes: CVE-2024-45310
|
|
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
|
---
|
|
libcontainer/rootfs_linux.go | 34 ++++----
|
|
libcontainer/system/linux.go | 44 +++++++++++
|
|
libcontainer/utils/utils_unix.go | 128 ++++++++++++++++++++++++++++++-
|
|
3 files changed, 189 insertions(+), 17 deletions(-)
|
|
|
|
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
|
index a948090..8a642b7 100644
|
|
--- a/libcontainer/rootfs_linux.go
|
|
+++ b/libcontainer/rootfs_linux.go
|
|
@@ -1,3 +1,4 @@
|
|
+//go:build linux
|
|
// +build linux
|
|
|
|
package libcontainer
|
|
@@ -18,6 +19,7 @@ import (
|
|
"github.com/docker/docker/pkg/mount"
|
|
"github.com/docker/docker/pkg/symlink"
|
|
"github.com/mrunalp/fileutils"
|
|
+ "github.com/opencontainers/runc/libcontainer/utils"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
@@ -163,7 +165,7 @@ func mountCmd(cmd configs.Command) error {
|
|
}
|
|
|
|
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|
- var err error
|
|
+ var err error
|
|
|
|
switch m.Device {
|
|
case "proc", "sysfs":
|
|
@@ -187,7 +189,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|
if strings.HasPrefix(m.Destination, "/proc/sys/") {
|
|
return nil
|
|
}
|
|
- if err := os.MkdirAll(dest, 0755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil {
|
|
return err
|
|
}
|
|
// Selinux kernels do not support labeling of /proc or /sys.
|
|
@@ -195,16 +197,16 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|
}
|
|
|
|
var dest = m.Destination
|
|
- if !strings.HasPrefix(dest, rootfs) {
|
|
- dest, err = securejoin.SecureJoin(rootfs, m.Destination)
|
|
- if err != nil {
|
|
- return err
|
|
- }
|
|
- }
|
|
+ if !strings.HasPrefix(dest, rootfs) {
|
|
+ dest, err = securejoin.SecureJoin(rootfs, m.Destination)
|
|
+ if err != nil {
|
|
+ return err
|
|
+ }
|
|
+ }
|
|
|
|
switch m.Device {
|
|
case "mqueue":
|
|
- if err := os.MkdirAll(dest, 0755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil {
|
|
return err
|
|
}
|
|
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
|
@@ -226,7 +228,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|
}
|
|
m.Destination = dest
|
|
if stat, err := os.Stat(dest); err != nil {
|
|
- if err := os.MkdirAll(dest, 0755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
@@ -283,7 +285,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|
}
|
|
// update the mount with the correct dest after symlinks are resolved.
|
|
m.Destination = dest
|
|
- if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
|
+ if err := createIfNotExists(rootfs, dest, stat.IsDir()); err != nil {
|
|
return err
|
|
}
|
|
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
|
@@ -371,7 +373,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
|
}
|
|
// update the mount with the correct dest after symlinks are resolved.
|
|
m.Destination = dest
|
|
- if err := os.MkdirAll(dest, 0755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, dest, 0755); err != nil {
|
|
return err
|
|
}
|
|
return mountPropagate(m, rootfs, mountLabel)
|
|
@@ -537,7 +539,7 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
- if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0755); err != nil {
|
|
return err
|
|
}
|
|
|
|
@@ -746,13 +748,13 @@ func msMoveRoot(rootfs string) error {
|
|
}
|
|
|
|
// createIfNotExists creates a file or a directory only if it does not already exist.
|
|
-func createIfNotExists(path string, isDir bool) error {
|
|
+func createIfNotExists(rootfs, path string, isDir bool) error {
|
|
if _, err := os.Stat(path); err != nil {
|
|
if os.IsNotExist(err) {
|
|
if isDir {
|
|
- return os.MkdirAll(path, 0755)
|
|
+ return utils.MkdirAllInRoot(rootfs, path, 0755)
|
|
}
|
|
- if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
|
+ if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(path), 0755); err != nil {
|
|
return err
|
|
}
|
|
f, err := os.OpenFile(path, os.O_CREATE, 0755)
|
|
diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go
|
|
index 1afc52b..9aaaada 100644
|
|
--- a/libcontainer/system/linux.go
|
|
+++ b/libcontainer/system/linux.go
|
|
@@ -1,3 +1,4 @@
|
|
+//go:build linux
|
|
// +build linux
|
|
|
|
package system
|
|
@@ -7,8 +8,12 @@ import (
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
+ "runtime"
|
|
+ "strings"
|
|
"syscall"
|
|
"unsafe"
|
|
+
|
|
+ "golang.org/x/sys/unix"
|
|
)
|
|
|
|
// If arg2 is nonzero, set the "child subreaper" attribute of the
|
|
@@ -134,6 +139,45 @@ func SetSubreaper(i int) error {
|
|
return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
|
}
|
|
|
|
+func prepareAt(dir *os.File, path string) (int, string) {
|
|
+ if dir == nil {
|
|
+ return unix.AT_FDCWD, path
|
|
+ }
|
|
+
|
|
+ // Rather than just filepath.Join-ing path here, do it manually so the
|
|
+ // error and handle correctly indicate cases like path=".." as being
|
|
+ // relative to the correct directory. The handle.Name() might end up being
|
|
+ // wrong but because this is (currently) only used in MkdirAllInRoot, that
|
|
+ // isn't a problem. // isn't a problem.
|
|
+ dirName := dir.Name()
|
|
+ if !strings.HasSuffix(dirName, "/") {
|
|
+ dirName += "/"
|
|
+ }
|
|
+ fullPath := dirName + path
|
|
+
|
|
+ return int(dir.Fd()), fullPath
|
|
+}
|
|
+
|
|
+func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
|
+ dirFd, fullPath := prepareAt(dir, path)
|
|
+ fd, err := unix.Openat(dirFd, path, flags, mode)
|
|
+ if err != nil {
|
|
+ return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
|
|
+ }
|
|
+ runtime.KeepAlive(dir)
|
|
+ return os.NewFile(uintptr(fd), fullPath), nil
|
|
+}
|
|
+
|
|
+func Mkdirat(dir *os.File, path string, mode uint32) error {
|
|
+ dirFd, fullPath := prepareAt(dir, path)
|
|
+ err := unix.Mkdirat(dirFd, path, mode)
|
|
+ if err != nil {
|
|
+ err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
|
|
+ }
|
|
+ runtime.KeepAlive(dir)
|
|
+ return err
|
|
+}
|
|
+
|
|
func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
|
|
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
|
|
if e1 != 0 {
|
|
diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go
|
|
index cfacfc2..a19f17c 100644
|
|
--- a/libcontainer/utils/utils_unix.go
|
|
+++ b/libcontainer/utils/utils_unix.go
|
|
@@ -5,17 +5,20 @@
|
|
package utils
|
|
|
|
import (
|
|
+ "errors"
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strconv"
|
|
+ "strings"
|
|
"sync"
|
|
_ "unsafe" // for go:linkname
|
|
|
|
- securejoin "github.com/cyphar/filepath-securejoin"
|
|
"github.com/Sirupsen/logrus"
|
|
+ securejoin "github.com/cyphar/filepath-securejoin"
|
|
+ "github.com/opencontainers/runc/libcontainer/system"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
@@ -157,6 +160,20 @@ func NewSockPair(name string) (parent, child *os.File, err error) {
|
|
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
|
}
|
|
|
|
+// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"),
|
|
+// but properly handling the case where path or root are "/".
|
|
+//
|
|
+// NOTE: The return value only make sense if the path doesn't contain "..".
|
|
+func IsLexicallyInRoot(root, path string) bool {
|
|
+ if root != "/" {
|
|
+ root += "/"
|
|
+ }
|
|
+ if path != "/" {
|
|
+ path += "/"
|
|
+ }
|
|
+ return strings.HasPrefix(path, root)
|
|
+}
|
|
+
|
|
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
|
// corresponding to the unsafePath resolved within the root. Before passing the
|
|
// fd, this path is verified to have been inside the root -- so operating on it
|
|
@@ -262,3 +279,112 @@ func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
|
|
func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
|
|
return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
|
|
}
|
|
+
|
|
+// MkdirAllInRootOpen attempts to make
|
|
+//
|
|
+// path, _ := securejoin.SecureJoin(root, unsafePath)
|
|
+// os.MkdirAll(path, mode)
|
|
+// os.Open(path)
|
|
+//
|
|
+// safer against attacks where components in the path are changed between
|
|
+// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
|
|
+// try to detect any symlink components in the path while we are doing the
|
|
+// MkdirAll.
|
|
+//
|
|
+// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
|
|
+// (the suid/sgid/sticky bits are not the same as for os.FileMode).
|
|
+//
|
|
+// NOTE: If unsafePath is a subpath of root, we assume that you have already
|
|
+// called SecureJoin and so we use the provided path verbatim without resolving
|
|
+// any symlinks (this is done in a way that avoids symlink-exchange races).
|
|
+// This means that the path also must not contain ".." elements, otherwise an
|
|
+// error will occur.
|
|
+//
|
|
+// This is a somewhat less safe alternative to
|
|
+// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
|
|
+// detect attempts to trick us into creating directories outside of the root.
|
|
+// We should migrate to securejoin.MkdirAll once it is merged.
|
|
+func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
|
|
+ // If the path is already "within" the root, use it verbatim.
|
|
+ fullPath := unsafePath
|
|
+ if !IsLexicallyInRoot(root, unsafePath) {
|
|
+ var err error
|
|
+ fullPath, err = securejoin.SecureJoin(root, unsafePath)
|
|
+ if err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+ }
|
|
+ subPath, err := filepath.Rel(root, fullPath)
|
|
+ if err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+
|
|
+ // Check for any silly mode bits.
|
|
+ if mode&^0o7777 != 0 {
|
|
+ return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
|
|
+ }
|
|
+
|
|
+ currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
|
|
+ if err != nil {
|
|
+ return nil, fmt.Errorf("open root handle: %w", err)
|
|
+ }
|
|
+ defer func() {
|
|
+ if Err != nil {
|
|
+ currentDir.Close()
|
|
+ }
|
|
+ }()
|
|
+
|
|
+ for _, part := range strings.Split(subPath, string(filepath.Separator)) {
|
|
+ switch part {
|
|
+ case "", ".":
|
|
+ // Skip over no-op components.
|
|
+ continue
|
|
+ case "..":
|
|
+ return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
|
|
+ }
|
|
+
|
|
+ nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
|
+ switch {
|
|
+ case err == nil:
|
|
+ // Update the currentDir.
|
|
+ _ = currentDir.Close()
|
|
+ currentDir = nextDir
|
|
+
|
|
+ case errors.Is(err, unix.ENOTDIR):
|
|
+ // This might be a symlink or some other random file. Either way,
|
|
+ // error out.
|
|
+ return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
|
|
+
|
|
+ case errors.Is(err, os.ErrNotExist):
|
|
+ // Luckily, mkdirat will not follow trailing symlinks, so this is
|
|
+ // safe to do as-is.
|
|
+ if err := system.Mkdirat(currentDir, part, mode); err != nil {
|
|
+ return nil, err
|
|
+ }
|
|
+ // Open the new directory. There is a race here where an attacker
|
|
+ // could swap the directory with a different directory, but
|
|
+ // MkdirAll's fuzzy semantics mean we don't care about that.
|
|
+ nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
|
+ if err != nil {
|
|
+ return nil, fmt.Errorf("open newly created directory: %w", err)
|
|
+ }
|
|
+ // Update the currentDir.
|
|
+ _ = currentDir.Close()
|
|
+ currentDir = nextDir
|
|
+
|
|
+ default:
|
|
+ return nil, err
|
|
+ }
|
|
+ }
|
|
+ return currentDir, nil
|
|
+}
|
|
+
|
|
+// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
|
|
+// returned handle, for callers that don't need to use it.
|
|
+func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
|
|
+ f, err := MkdirAllInRootOpen(root, unsafePath, mode)
|
|
+ if err == nil {
|
|
+ _ = f.Close()
|
|
+ }
|
|
+ return err
|
|
+}
|
|
--
|
|
2.33.0
|
|
|