diff options
author | Florian Westphal <fw@strlen.de> | 2025-08-01 17:25:08 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-08-20 18:30:17 +0200 |
commit | a2cb4df7872de069f809de2f076ec8e54d649fe3 (patch) | |
tree | 5b8d8efd17168a2cdc97192cec927ba59b6a6e14 | |
parent | 5e95347bf4ca35336fc4107537bec46af784d8d6 (diff) |
netfilter: ctnetlink: fix refcount leak on table dump
[ Upstream commit de788b2e6227462b6dcd0e07474e72c089008f74 ]
There is a reference count leak in ctnetlink_dump_table():
if (res < 0) {
nf_conntrack_get(&ct->ct_general); // HERE
cb->args[1] = (unsigned long)ct;
...
While its very unlikely, its possible that ct == last.
If this happens, then the refcount of ct was already incremented.
This 2nd increment is never undone.
This prevents the conntrack object from being released, which in turn
keeps prevents cnet->count from dropping back to 0.
This will then block the netns dismantle (or conntrack rmmod) as
nf_conntrack_cleanup_net_list() will wait forever.
This can be reproduced by running conntrack_resize.sh selftest in a loop.
It takes ~20 minutes for me on a preemptible kernel on average before
I see a runaway kworker spinning in nf_conntrack_cleanup_net_list.
One fix would to change this to:
if (res < 0) {
if (ct != last)
nf_conntrack_get(&ct->ct_general);
But this reference counting isn't needed in the first place.
We can just store a cookie value instead.
A followup patch will do the same for ctnetlink_exp_dump_table,
it looks to me as if this has the same problem and like
ctnetlink_dump_table, we only need a 'skip hint', not the actual
object so we can apply the same cookie strategy there as well.
Fixes: d205dc40798d ("[NETFILTER]: ctnetlink: fix deadlock in table dumping")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6a1239433830..18a91c031554 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -860,8 +860,6 @@ errout: static int ctnetlink_done(struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); kfree(cb->data); return 0; } @@ -1184,19 +1182,26 @@ ignore_entry: return 0; } +static unsigned long ctnetlink_get_id(const struct nf_conn *ct) +{ + unsigned long id = nf_ct_get_id(ct); + + return id ? id : 1; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); - struct nf_conn *ct, *last; + unsigned long last_id = cb->args[1]; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *nf_ct_evict[8]; + struct nf_conn *ct; int res, i; spinlock_t *lockp; - last = (struct nf_conn *)cb->args[1]; i = 0; local_bh_disable(); @@ -1233,7 +1238,7 @@ restart: continue; if (cb->args[1]) { - if (ct != last) + if (ctnetlink_get_id(ct) != last_id) continue; cb->args[1] = 0; } @@ -1246,8 +1251,7 @@ restart: NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, true, flags); if (res < 0) { - nf_conntrack_get(&ct->ct_general); - cb->args[1] = (unsigned long)ct; + cb->args[1] = ctnetlink_get_id(ct); spin_unlock(lockp); goto out; } @@ -1260,12 +1264,10 @@ restart: } out: local_bh_enable(); - if (last) { + if (last_id) { /* nf ct hash resize happened, now clear the leftover. */ - if ((struct nf_conn *)cb->args[1] == last) + if (cb->args[1] == last_id) cb->args[1] = 0; - - nf_ct_put(last); } while (i) { |