summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-28 12:50:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-28 12:50:56 -0700
commitf70d24c230bcaa1e95f66252133068a98c895200 (patch)
tree466a2498ccdfe44a63f36253e390a2d5cb997690
parent934600daa7bcce8ad6d5efe05cce4811c8d2f464 (diff)
parent76fdb7eb4e1c91086ce9c3db6972c2ed48c96afb (diff)
Merge tag 'vfs-6.17-rc1.nsfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull namespace updates from Christian Brauner: "This contains namespace updates. This time specifically for nsfs: - Userspace heavily relies on the root inode numbers for namespaces to identify the initial namespaces. That's already a hard dependency. So we cannot change that anymore. Move the initial inode numbers to a public header and align the only two namespaces that currently don't do that with all the other namespaces. - The root inode of /proc having a fixed inode number has been part of the core kernel ABI since its inception, and recently some userspace programs (mainly container runtimes) have started to explicitly depend on this behaviour. The main reason this is useful to userspace is that by checking that a suspect /proc handle has fstype PROC_SUPER_MAGIC and is PROCFS_ROOT_INO, they can then use openat2() together with RESOLVE_{NO_{XDEV,MAGICLINK},BENEATH} to ensure that there isn't a bind-mount that replaces some procfs file with a different one. This kind of attack has lead to security issues in container runtimes in the past (such as CVE-2019-19921) and libraries like libpathrs[1] use this feature of procfs to provide safe procfs handling functions" * tag 'vfs-6.17-rc1.nsfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: uapi: export PROCFS_ROOT_INO mntns: use stable inode number for initial mount ns netns: use stable inode number for initial mount ns nsfs: move root inode number to uapi
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/proc/root.c10
-rw-r--r--include/linux/proc_ns.h16
-rw-r--r--include/uapi/linux/fs.h11
-rw-r--r--include/uapi/linux/nsfs.h11
-rw-r--r--net/core/net_namespace.c8
6 files changed, 47 insertions, 13 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 87c0061302051..ddfd4457d3383 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -6090,9 +6090,11 @@ static void __init init_mount_tree(void)
if (IS_ERR(mnt))
panic("Can't create rootfs");
- ns = alloc_mnt_ns(&init_user_ns, false);
+ ns = alloc_mnt_ns(&init_user_ns, true);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
+ ns->seq = atomic64_inc_return(&mnt_ns_seq);
+ ns->ns.inum = PROC_MNT_INIT_INO;
m = real_mount(mnt);
ns->root = m;
ns->nr_mounts = 1;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 06a297a27ba3b..ed86ac7103843 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -363,12 +363,12 @@ static const struct inode_operations proc_root_inode_operations = {
* This is the root "inode" in the /proc tree..
*/
struct proc_dir_entry proc_root = {
- .low_ino = PROC_ROOT_INO,
- .namelen = 5,
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
- .nlink = 2,
+ .low_ino = PROCFS_ROOT_INO,
+ .namelen = 5,
+ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .nlink = 2,
.refcnt = REFCOUNT_INIT(1),
- .proc_iops = &proc_root_inode_operations,
+ .proc_iops = &proc_root_inode_operations,
.proc_dir_ops = &proc_root_operations,
.parent = &proc_root,
.subdir = RB_ROOT,
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 5ea470eb4d768..4b20375f3783e 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -6,6 +6,7 @@
#define _LINUX_PROC_NS_H
#include <linux/ns_common.h>
+#include <uapi/linux/nsfs.h>
struct pid_namespace;
struct nsset;
@@ -39,13 +40,14 @@ extern const struct proc_ns_operations timens_for_children_operations;
* We always define these enumerators
*/
enum {
- PROC_ROOT_INO = 1,
- PROC_IPC_INIT_INO = 0xEFFFFFFFU,
- PROC_UTS_INIT_INO = 0xEFFFFFFEU,
- PROC_USER_INIT_INO = 0xEFFFFFFDU,
- PROC_PID_INIT_INO = 0xEFFFFFFCU,
- PROC_CGROUP_INIT_INO = 0xEFFFFFFBU,
- PROC_TIME_INIT_INO = 0xEFFFFFFAU,
+ PROC_IPC_INIT_INO = IPC_NS_INIT_INO,
+ PROC_UTS_INIT_INO = UTS_NS_INIT_INO,
+ PROC_USER_INIT_INO = USER_NS_INIT_INO,
+ PROC_PID_INIT_INO = PID_NS_INIT_INO,
+ PROC_CGROUP_INIT_INO = CGROUP_NS_INIT_INO,
+ PROC_TIME_INIT_INO = TIME_NS_INIT_INO,
+ PROC_NET_INIT_INO = NET_NS_INIT_INO,
+ PROC_MNT_INIT_INO = MNT_NS_INIT_INO,
};
#ifdef CONFIG_PROC_FS
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 0098b0ce8ccb1..28238a3edbc1b 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -60,6 +60,17 @@
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
+/*
+ * The root inode of procfs is guaranteed to always have the same inode number.
+ * For programs that make heavy use of procfs, verifying that the root is a
+ * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH})
+ * will allow you to make sure you are never tricked into operating on the
+ * wrong procfs file.
+ */
+enum procfs_ino {
+ PROCFS_ROOT_INO = 1,
+};
+
struct file_clone_range {
__s64 src_fd;
__u64 src_offset;
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index 34127653fd007..97d8d80d139fc 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -42,4 +42,15 @@ struct mnt_ns_info {
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+enum init_ns_ino {
+ IPC_NS_INIT_INO = 0xEFFFFFFFU,
+ UTS_NS_INIT_INO = 0xEFFFFFFEU,
+ USER_NS_INIT_INO = 0xEFFFFFFDU,
+ PID_NS_INIT_INO = 0xEFFFFFFCU,
+ CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
+ TIME_NS_INIT_INO = 0xEFFFFFFAU,
+ NET_NS_INIT_INO = 0xEFFFFFF9U,
+ MNT_NS_INIT_INO = 0xEFFFFFF8U,
+};
+
#endif /* __LINUX_NSFS_H */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae54f26709ca2..03cf87d3b3801 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -796,11 +796,19 @@ static __net_init int net_ns_net_init(struct net *net)
#ifdef CONFIG_NET_NS
net->ns.ops = &netns_operations;
#endif
+ if (net == &init_net) {
+ net->ns.inum = PROC_NET_INIT_INO;
+ return 0;
+ }
return ns_alloc_inum(&net->ns);
}
static __net_exit void net_ns_net_exit(struct net *net)
{
+ /*
+ * Initial network namespace doesn't exit so we don't need any
+ * special checks here.
+ */
ns_free_inum(&net->ns);
}