diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-10-09 20:08:09 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-10-09 20:08:10 -0700 |
commit | 09cf85ef183a5603db49d542264ddbece3258e55 (patch) | |
tree | 6862fb6f4da12be4bb50aae2df27ca5471f995d3 | |
parent | 22ee378eb6814b68664a8032f9eecd72e9b3dcda (diff) | |
parent | 99ee348e6a41cf24b334a1bb7cde87239e8e2d95 (diff) |
Merge branch 'ipv4-namespacify-ipv4-address-hash-table'
Kuniyuki Iwashima says:
====================
ipv4: Namespacify IPv4 address hash table.
This is a prep of per-net RTNL conversion for RTM_(NEW|DEL|SET)ADDR.
Currently, each IPv4 address is linked to the global hash table, and
this needs to be protected by another global lock or namespacified to
support per-net RTNL.
Adding a global lock will cause deadlock in the rtnetlink path and GC,
rtnetlink check_lifetime
|- rtnl_net_lock(net) |- acquire the global lock
|- acquire the global lock |- check ifa's netns
`- put ifa into hash table `- rtnl_net_lock(net)
so we need to namespacify the hash table.
The IPv6 one is already namespacified, let's follow that.
v2: https://lore.kernel.org/netdev/20241004195958.64396-1-kuniyu@amazon.com/
v1: https://lore.kernel.org/netdev/20241001024837.96425-1-kuniyu@amazon.com/
====================
Link: https://patch.msgid.link/20241008172906.1326-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/linux/inetdevice.h | 2 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 2 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 69 |
3 files changed, 42 insertions, 31 deletions
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index cb5280e6cc219..d9c690c8c80b1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -141,7 +141,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ARP_EVICT_NOCARRIER) struct in_ifaddr { - struct hlist_node hash; + struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 276f622f35168..66a4cffc44ee2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -270,5 +270,7 @@ struct netns_ipv4 { atomic_t rt_genid; siphash_key_t ip_id_key; + struct hlist_head *inet_addr_lst; + struct delayed_work addr_chk_work; }; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ab76744383cf3..7c156f85b7d25 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -119,8 +119,6 @@ struct inet_fill_args { #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) -static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; - static u32 inet_addr_hash(const struct net *net, __be32 addr) { u32 val = (__force u32) addr ^ net_hash_mix(net); @@ -133,13 +131,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) u32 hash = inet_addr_hash(net, ifa->ifa_local); ASSERT_RTNL(); - hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); + hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]); } static void inet_hash_remove(struct in_ifaddr *ifa) { ASSERT_RTNL(); - hlist_del_init_rcu(&ifa->hash); + hlist_del_init_rcu(&ifa->addr_lst); } /** @@ -186,9 +184,8 @@ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr) u32 hash = inet_addr_hash(net, addr); struct in_ifaddr *ifa; - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) - if (ifa->ifa_local == addr && - net_eq(dev_net(ifa->ifa_dev->dev), net)) + hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst) + if (ifa->ifa_local == addr) return ifa; return NULL; @@ -227,7 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev) in_dev_hold(in_dev); ifa->ifa_dev = in_dev; - INIT_HLIST_NODE(&ifa->hash); + INIT_HLIST_NODE(&ifa->addr_lst); return ifa; } @@ -484,15 +481,12 @@ static void inet_del_ifa(struct in_device *in_dev, __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); } -static void check_lifetime(struct work_struct *work); - -static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); - static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid, struct netlink_ext_ack *extack) { struct in_ifaddr __rcu **last_primary, **ifap; struct in_device *in_dev = ifa->ifa_dev; + struct net *net = dev_net(in_dev->dev); struct in_validator_info ivi; struct in_ifaddr *ifa1; int ret; @@ -561,8 +555,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, inet_hash_insert(dev_net(in_dev->dev), ifa); - cancel_delayed_work(&check_lifetime_work); - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); + cancel_delayed_work(&net->ipv4.addr_chk_work); + queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0); /* Send message first, then call notifier. Notifier will trigger FIB update, so that @@ -708,16 +702,19 @@ static void check_lifetime(struct work_struct *work) unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; struct hlist_node *n; + struct net *net; int i; + net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); for (i = 0; i < IN4_ADDR_HSIZE; i++) { + struct hlist_head *head = &net->ipv4.inet_addr_lst[i]; bool change_needed = false; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { + hlist_for_each_entry_rcu(ifa, head, addr_lst) { unsigned long age, tstamp; u32 preferred_lft; u32 valid_lft; @@ -755,7 +752,7 @@ static void check_lifetime(struct work_struct *work) if (!change_needed) continue; rtnl_lock(); - hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { + hlist_for_each_entry_safe(ifa, n, head, addr_lst) { unsigned long age; if (ifa->ifa_flags & IFA_F_PERMANENT) @@ -804,8 +801,8 @@ static void check_lifetime(struct work_struct *work) if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, - next_sched - now); + queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, + next_sched - now); } static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, @@ -1002,9 +999,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, ifa->ifa_proto = new_proto; set_ifa_lifetime(ifa, valid_lft, prefered_lft); - cancel_delayed_work(&check_lifetime_work); + cancel_delayed_work(&net->ipv4.addr_chk_work); queue_delayed_work(system_power_efficient_wq, - &check_lifetime_work, 0); + &net->ipv4.addr_chk_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); } return 0; @@ -2663,14 +2660,21 @@ static struct ctl_table ctl_forward_entry[] = { static __net_init int devinet_init_net(struct net *net) { - int err; - struct ipv4_devconf *all, *dflt; #ifdef CONFIG_SYSCTL - struct ctl_table *tbl; struct ctl_table_header *forw_hdr; + struct ctl_table *tbl; #endif + struct ipv4_devconf *all, *dflt; + int err; + int i; err = -ENOMEM; + net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->ipv4.inet_addr_lst) + goto err_alloc_hash; + all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL); if (!all) goto err_alloc_all; @@ -2731,6 +2735,11 @@ static __net_init int devinet_init_net(struct net *net) net->ipv4.forw_hdr = forw_hdr; #endif + for (i = 0; i < IN4_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]); + + INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime); + net->ipv4.devconf_all = all; net->ipv4.devconf_dflt = dflt; return 0; @@ -2748,6 +2757,8 @@ err_alloc_ctl: err_alloc_dflt: kfree(all); err_alloc_all: + kfree(net->ipv4.inet_addr_lst); +err_alloc_hash: return err; } @@ -2755,7 +2766,11 @@ static __net_exit void devinet_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL const struct ctl_table *tbl; +#endif + + cancel_delayed_work_sync(&net->ipv4.addr_chk_work); +#ifdef CONFIG_SYSCTL tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, @@ -2766,6 +2781,7 @@ static __net_exit void devinet_exit_net(struct net *net) #endif kfree(net->ipv4.devconf_dflt); kfree(net->ipv4.devconf_all); + kfree(net->ipv4.inet_addr_lst); } static __net_initdata struct pernet_operations devinet_ops = { @@ -2783,16 +2799,9 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = { void __init devinet_init(void) { - int i; - - for (i = 0; i < IN4_ADDR_HSIZE; i++) - INIT_HLIST_HEAD(&inet_addr_lst[i]); - register_pernet_subsys(&devinet_ops); register_netdevice_notifier(&ip_netdev_notifier); - queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); - rtnl_af_register(&inet_af_ops); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); |