summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-09-19 10:16:49 +0200
committerChristian Brauner <brauner@kernel.org>2025-09-19 16:22:38 +0200
commit1f84344c8d83bb867d142608cf543b80bc74b7a2 (patch)
tree537217bd05efdba7244997dc3e246c81c6723c10
parentbb57289f0ce1bab7c9ea2106a29088088dc95229 (diff)
parent024596a4e2802e457a9f92af79f246fa9631f8de (diff)
Merge patch series "ns: rework reference counting"
Christian Brauner <brauner@kernel.org> says: Stop open accesses to the reference counts and cargo-culting the same code in all namespace. Use a set of dedicated helpers and make the actual count private. * patches from https://lore.kernel.org/20250918-work-namespace-ns_ref-v1-0-1b0a98ee041e@kernel.org: ns: rename to __ns_ref nsfs: port to ns_ref_*() helpers net: port to ns_ref_*() helpers uts: port to ns_ref_*() helpers ipv4: use check_net() net: use check_net() net-sysfs: use check_net() user: port to ns_ref_*() helpers time: port to ns_ref_*() helpers pid: port to ns_ref_*() helpers ipc: port to ns_ref_*() helpers cgroup: port to ns_ref_*() helpers mnt: port to ns_ref_*() helpers ns: add reference count helpers Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nsfs.c2
-rw-r--r--include/linux/cgroup_namespace.h4
-rw-r--r--include/linux/ipc_namespace.h4
-rw-r--r--include/linux/ns_common.h47
-rw-r--r--include/linux/pid_namespace.h2
-rw-r--r--include/linux/time_namespace.h4
-rw-r--r--include/linux/user_namespace.h4
-rw-r--r--include/linux/uts_namespace.h4
-rw-r--r--include/net/net_namespace.h8
-rw-r--r--init/version-timestamp.c2
-rw-r--r--ipc/msgutil.c2
-rw-r--r--ipc/namespace.c2
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/nscommon.c2
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/pid_namespace.c4
-rw-r--r--kernel/time/namespace.c2
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c2
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c4
-rw-r--r--net/ipv4/tcp_metrics.c2
25 files changed, 74 insertions, 49 deletions
diff --git a/fs/mount.h b/fs/mount.h
index 76bf863c9ae2..79c85639a7ba 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -143,7 +143,7 @@ static inline void detach_mounts(struct dentry *dentry)
static inline void get_mnt_ns(struct mnt_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
}
extern seqlock_t mount_lock;
diff --git a/fs/namespace.c b/fs/namespace.c
index b9f94769ec11..740a6ba524d0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2110,7 +2110,7 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
* the mount namespace and it might already be on its
* deathbed.
*/
- if (!refcount_inc_not_zero(&mntns->ns.count))
+ if (!ns_ref_get(mntns))
continue;
return mntns;
@@ -6015,7 +6015,7 @@ struct mnt_namespace init_mnt_ns = {
.ns.inum = PROC_MNT_INIT_INO,
.ns.ops = &mntns_operations,
.user_ns = &init_user_ns,
- .ns.count = REFCOUNT_INIT(1),
+ .ns.__ns_ref = REFCOUNT_INIT(1),
.passive = REFCOUNT_INIT(1),
.mounts = RB_ROOT,
.poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
@@ -6084,7 +6084,7 @@ void __init mnt_init(void)
void put_mnt_ns(struct mnt_namespace *ns)
{
- if (!refcount_dec_and_test(&ns->ns.count))
+ if (!ns_ref_put(ns))
return;
namespace_lock();
emptied_ns = ns;
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8484bc4dd3de..dc0a4404b971 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -492,7 +492,7 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
VFS_WARN_ON_ONCE(ns->ops->type != fid->ns_type);
VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum);
- if (!refcount_inc_not_zero(&ns->count))
+ if (!__ns_ref_get(ns))
return NULL;
}
diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
index c02bb76c5e32..b7dbf4d623d2 100644
--- a/include/linux/cgroup_namespace.h
+++ b/include/linux/cgroup_namespace.h
@@ -29,12 +29,12 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
static inline void get_cgroup_ns(struct cgroup_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
}
static inline void put_cgroup_ns(struct cgroup_namespace *ns)
{
- if (refcount_dec_and_test(&ns->ns.count))
+ if (ns_ref_put(ns))
free_cgroup_ns(ns);
}
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 924e4754374f..21eff63f47da 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -140,14 +140,14 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
{
if (ns) {
- if (refcount_inc_not_zero(&ns->ns.count))
+ if (ns_ref_get(ns))
return ns;
}
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 19833ac547f9..aea8528d799a 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -29,7 +29,7 @@ struct ns_common {
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
- refcount_t count;
+ refcount_t __ns_ref; /* do not use directly */
union {
struct {
u64 ns_id;
@@ -43,16 +43,24 @@ struct ns_common {
int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum);
void __ns_common_free(struct ns_common *ns);
-#define to_ns_common(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &(__ns)->ns, \
- struct ipc_namespace *: &(__ns)->ns, \
- struct mnt_namespace *: &(__ns)->ns, \
- struct net *: &(__ns)->ns, \
- struct pid_namespace *: &(__ns)->ns, \
- struct time_namespace *: &(__ns)->ns, \
- struct user_namespace *: &(__ns)->ns, \
- struct uts_namespace *: &(__ns)->ns)
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns)->ns, \
+ const struct cgroup_namespace *: &(__ns)->ns, \
+ struct ipc_namespace *: &(__ns)->ns, \
+ const struct ipc_namespace *: &(__ns)->ns, \
+ struct mnt_namespace *: &(__ns)->ns, \
+ const struct mnt_namespace *: &(__ns)->ns, \
+ struct net *: &(__ns)->ns, \
+ const struct net *: &(__ns)->ns, \
+ struct pid_namespace *: &(__ns)->ns, \
+ const struct pid_namespace *: &(__ns)->ns, \
+ struct time_namespace *: &(__ns)->ns, \
+ const struct time_namespace *: &(__ns)->ns, \
+ struct user_namespace *: &(__ns)->ns, \
+ const struct user_namespace *: &(__ns)->ns, \
+ struct uts_namespace *: &(__ns)->ns, \
+ const struct uts_namespace *: &(__ns)->ns)
#define ns_init_inum(__ns) \
_Generic((__ns), \
@@ -83,4 +91,21 @@ void __ns_common_free(struct ns_common *ns);
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
+static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
+{
+ return refcount_dec_and_test(&ns->__ns_ref);
+}
+
+static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
+{
+ return refcount_inc_not_zero(&ns->__ns_ref);
+}
+
+#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
+#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
+#define ns_ref_put_and_lock(__ns, __lock) \
+ refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
+
#endif
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index ba0efc8c8596..5b2f29d369c4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -62,7 +62,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
if (ns != &init_pid_ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index a47a4ce4183e..f3b9567cf1f4 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -44,7 +44,7 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
@@ -57,7 +57,7 @@ struct page *find_timens_vvar_page(struct vm_area_struct *vma);
static inline void put_time_ns(struct time_namespace *ns)
{
- if (refcount_dec_and_test(&ns->ns.count))
+ if (ns_ref_put(ns))
free_time_ns(ns);
}
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index a09056ad090e..9a9aebbf96b9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -176,7 +176,7 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns)
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
return ns;
}
@@ -186,7 +186,7 @@ extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
- if (ns && refcount_dec_and_test(&ns->ns.count))
+ if (ns && ns_ref_put(ns))
__put_user_ns(ns);
}
diff --git a/include/linux/uts_namespace.h b/include/linux/uts_namespace.h
index c2b619bb4e57..23b4f0e1b338 100644
--- a/include/linux/uts_namespace.h
+++ b/include/linux/uts_namespace.h
@@ -25,7 +25,7 @@ static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
static inline void get_uts_ns(struct uts_namespace *ns)
{
- refcount_inc(&ns->ns.count);
+ ns_ref_inc(ns);
}
extern struct uts_namespace *copy_utsname(unsigned long flags,
@@ -34,7 +34,7 @@ extern void free_uts_ns(struct uts_namespace *ns);
static inline void put_uts_ns(struct uts_namespace *ns)
{
- if (refcount_dec_and_test(&ns->ns.count))
+ if (ns_ref_put(ns))
free_uts_ns(ns);
}
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fd090ceb80bf..3e7c825e5810 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -270,7 +270,7 @@ static inline struct net *to_net_ns(struct ns_common *ns)
/* Try using get_net_track() instead */
static inline struct net *get_net(struct net *net)
{
- refcount_inc(&net->ns.count);
+ ns_ref_inc(net);
return net;
}
@@ -281,7 +281,7 @@ static inline struct net *maybe_get_net(struct net *net)
* exists. If the reference count is zero this
* function fails and returns NULL.
*/
- if (!refcount_inc_not_zero(&net->ns.count))
+ if (!ns_ref_get(net))
net = NULL;
return net;
}
@@ -289,7 +289,7 @@ static inline struct net *maybe_get_net(struct net *net)
/* Try using put_net_track() instead */
static inline void put_net(struct net *net)
{
- if (refcount_dec_and_test(&net->ns.count))
+ if (ns_ref_put(net))
__put_net(net);
}
@@ -301,7 +301,7 @@ int net_eq(const struct net *net1, const struct net *net2)
static inline int check_net(const struct net *net)
{
- return refcount_read(&net->ns.count) != 0;
+ return ns_ref_read(net) != 0;
}
void net_drop_ns(void *);
diff --git a/init/version-timestamp.c b/init/version-timestamp.c
index 043cbf80a766..547e522e6016 100644
--- a/init/version-timestamp.c
+++ b/init/version-timestamp.c
@@ -8,7 +8,7 @@
#include <linux/utsname.h>
struct uts_namespace init_uts_ns = {
- .ns.count = REFCOUNT_INIT(2),
+ .ns.__ns_ref = REFCOUNT_INIT(2),
.name = {
.sysname = UTS_SYSNAME,
.nodename = UTS_NODENAME,
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index bbf61275df41..d0f7dcf4c208 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -27,7 +27,7 @@ DEFINE_SPINLOCK(mq_lock);
* and not CONFIG_IPC_NS.
*/
struct ipc_namespace init_ipc_ns = {
- .ns.count = REFCOUNT_INIT(1),
+ .ns.__ns_ref = REFCOUNT_INIT(1),
.user_ns = &init_user_ns,
.ns.inum = PROC_IPC_INIT_INO,
#ifdef CONFIG_IPC_NS
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 09d261a1a2aa..bd85d1c9d2c2 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -199,7 +199,7 @@ static void free_ipc(struct work_struct *unused)
*/
void put_ipc_ns(struct ipc_namespace *ns)
{
- if (refcount_dec_and_lock(&ns->ns.count, &mq_lock)) {
+ if (ns_ref_put_and_lock(ns, &mq_lock)) {
mq_clear_sbinfo(ns);
spin_unlock(&mq_lock);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 092e6bf081ed..a0e24adceef0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -219,7 +219,7 @@ static bool have_favordynmods __ro_after_init = IS_ENABLED(CONFIG_CGROUP_FAVOR_D
/* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = {
- .ns.count = REFCOUNT_INIT(2),
+ .ns.__ns_ref = REFCOUNT_INIT(2),
.user_ns = &init_user_ns,
.ns.ops = &cgroupns_operations,
.ns.inum = PROC_CGROUP_INIT_INO,
diff --git a/kernel/nscommon.c b/kernel/nscommon.c
index 7c1b07e2a6c9..7aa2be6a0c32 100644
--- a/kernel/nscommon.c
+++ b/kernel/nscommon.c
@@ -5,7 +5,7 @@
int __ns_common_init(struct ns_common *ns, const struct proc_ns_operations *ops, int inum)
{
- refcount_set(&ns->count, 1);
+ refcount_set(&ns->__ns_ref, 1);
ns->stashed = NULL;
ns->ops = ops;
ns->ns_id = 0;
diff --git a/kernel/pid.c b/kernel/pid.c
index c45a28c16cd2..e222426f745d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -71,7 +71,7 @@ static int pid_max_max = PID_MAX_LIMIT;
* the scheme scales to up to 4 million PIDs, runtime.
*/
struct pid_namespace init_pid_ns = {
- .ns.count = REFCOUNT_INIT(2),
+ .ns.__ns_ref = REFCOUNT_INIT(2),
.idr = IDR_INIT(init_pid_ns.idr),
.pid_allocated = PIDNS_ADDING,
.level = 0,
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 27e2dd9ee051..162f5fb63d75 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -169,7 +169,7 @@ static void destroy_pid_namespace_work(struct work_struct *work)
parent = ns->parent;
destroy_pid_namespace(ns);
ns = parent;
- } while (ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count));
+ } while (ns != &init_pid_ns && ns_ref_put(ns));
}
struct pid_namespace *copy_pid_ns(unsigned long flags,
@@ -184,7 +184,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags,
void put_pid_ns(struct pid_namespace *ns)
{
- if (ns && ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count))
+ if (ns && ns != &init_pid_ns && ns_ref_put(ns))
schedule_work(&ns->work);
}
EXPORT_SYMBOL_GPL(put_pid_ns);
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index d49c73015d6e..d70bdfb7b001 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -480,7 +480,7 @@ const struct proc_ns_operations timens_for_children_operations = {
};
struct time_namespace init_time_ns = {
- .ns.count = REFCOUNT_INIT(3),
+ .ns.__ns_ref = REFCOUNT_INIT(3),
.user_ns = &init_user_ns,
.ns.inum = PROC_TIME_INIT_INO,
.ns.ops = &timens_operations,
diff --git a/kernel/user.c b/kernel/user.c
index f46b1d41163b..17a742fb4e10 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -65,7 +65,7 @@ struct user_namespace init_user_ns = {
.nr_extents = 1,
},
},
- .ns.count = REFCOUNT_INIT(3),
+ .ns.__ns_ref = REFCOUNT_INIT(3),
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
.ns.inum = PROC_USER_INIT_INO,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 32406bcab526..f9df45c46235 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -225,7 +225,7 @@ static void free_user_ns(struct work_struct *work)
kfree_rcu(ns, ns.ns_rcu);
dec_user_namespaces(ucounts);
ns = parent;
- } while (refcount_dec_and_test(&parent->ns.count));
+ } while (ns_ref_put(parent));
}
void __put_user_ns(struct user_namespace *ns)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c28cd6665444..3c2dc4c5e683 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1328,7 +1328,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
struct netdev_rx_queue *queue = &dev->_rx[i];
struct kobject *kobj = &queue->kobj;
- if (!refcount_read(&dev_net(dev)->ns.count))
+ if (!check_net(dev_net(dev)))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -2061,7 +2061,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
- if (!refcount_read(&dev_net(dev)->ns.count))
+ if (!check_net(dev_net(dev)))
queue->kobj.uevent_suppress = 1;
if (netdev_uses_bql(dev))
@@ -2315,7 +2315,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &ndev->dev;
- if (!refcount_read(&dev_net(ndev)->ns.count))
+ if (!check_net(dev_net(ndev)))
dev_set_uevent_suppress(dev, 1);
kobject_get(&dev->kobj);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a6a3de56a81c..d5e3fd819163 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -315,7 +315,7 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
{
int id;
- if (refcount_read(&net->ns.count) == 0)
+ if (!check_net(net))
return NETNSA_NSID_NOT_ASSIGNED;
spin_lock(&net->nsid_lock);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 875ff923a8ed..56a117560c0c 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -329,13 +329,13 @@ restart:
TCPF_NEW_SYN_RECV))
continue;
- if (refcount_read(&sock_net(sk)->ns.count))
+ if (check_net(sock_net(sk)))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (refcount_read(&sock_net(sk)->ns.count)) {
+ if (check_net(sock_net(sk))) {
sock_gen_put(sk);
goto restart;
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03c068ea27b6..b67f94c60f9f 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -912,7 +912,7 @@ static void tcp_metrics_flush_all(struct net *net)
spin_lock_bh(&tcp_metrics_lock);
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
match = net ? net_eq(tm_net(tm), net) :
- !refcount_read(&tm_net(tm)->ns.count);
+ !check_net(tm_net(tm));
if (match) {
rcu_assign_pointer(*pp, tm->tcpm_next);
kfree_rcu(tm, rcu_head);