diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 183 | ||||
-rw-r--r-- | net/core/dev.h | 29 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 39 | ||||
-rw-r--r-- | net/core/netdev-genl.c | 56 | ||||
-rw-r--r-- | net/shaper/shaper.c | 6 |
5 files changed, 258 insertions, 55 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 47e6b0f73cfc..b6722ed9767a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -768,7 +768,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id) } /* must be called under rcu_read_lock(), as we dont take a reference */ -struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id) +static struct napi_struct * +netdev_napi_by_id(struct net *net, unsigned int napi_id) { struct napi_struct *napi; @@ -785,6 +786,49 @@ struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id) } /** + * netdev_napi_by_id_lock() - find a device by NAPI ID and lock it + * @net: the applicable net namespace + * @napi_id: ID of a NAPI of a target device + * + * Find a NAPI instance with @napi_id. Lock its device. + * The device must be in %NETREG_REGISTERED state for lookup to succeed. + * netdev_unlock() must be called to release it. + * + * Return: pointer to NAPI, its device with lock held, NULL if not found. + */ +struct napi_struct * +netdev_napi_by_id_lock(struct net *net, unsigned int napi_id) +{ + struct napi_struct *napi; + struct net_device *dev; + + rcu_read_lock(); + napi = netdev_napi_by_id(net, napi_id); + if (!napi || READ_ONCE(napi->dev->reg_state) != NETREG_REGISTERED) { + rcu_read_unlock(); + return NULL; + } + + dev = napi->dev; + dev_hold(dev); + rcu_read_unlock(); + + dev = __netdev_put_lock(dev); + if (!dev) + return NULL; + + rcu_read_lock(); + napi = netdev_napi_by_id(net, napi_id); + if (napi && napi->dev != dev) + napi = NULL; + rcu_read_unlock(); + + if (!napi) + netdev_unlock(dev); + return napi; +} + +/** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace * @name: name to find @@ -972,6 +1016,73 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) return napi ? napi->dev : NULL; } +/* Release the held reference on the net_device, and if the net_device + * is still registered try to lock the instance lock. If device is being + * unregistered NULL will be returned (but the reference has been released, + * either way!) + * + * This helper is intended for locking net_device after it has been looked up + * using a lockless lookup helper. Lock prevents the instance from going away. + */ +struct net_device *__netdev_put_lock(struct net_device *dev) +{ + netdev_lock(dev); + if (dev->reg_state > NETREG_REGISTERED) { + netdev_unlock(dev); + dev_put(dev); + return NULL; + } + dev_put(dev); + return dev; +} + +/** + * netdev_get_by_index_lock() - find a device by its ifindex + * @net: the applicable net namespace + * @ifindex: index of device + * + * Search for an interface by index. If a valid device + * with @ifindex is found it will be returned with netdev->lock held. + * netdev_unlock() must be called to release it. + * + * Return: pointer to a device with lock held, NULL if not found. + */ +struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) +{ + struct net_device *dev; + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return NULL; + + return __netdev_put_lock(dev); +} + +struct net_device * +netdev_xa_find_lock(struct net *net, struct net_device *dev, + unsigned long *index) +{ + if (dev) + netdev_unlock(dev); + + do { + rcu_read_lock(); + dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT); + if (!dev) { + rcu_read_unlock(); + return NULL; + } + dev_hold(dev); + rcu_read_unlock(); + + dev = __netdev_put_lock(dev); + if (dev) + return dev; + + (*index)++; + } while (true); +} + static DEFINE_SEQLOCK(netdev_rename_lock); void netdev_copy_name(struct net_device *dev, char *name) @@ -1509,7 +1620,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { - dev->flags |= IFF_UP; + netif_set_up(dev, true); dev_set_rx_mode(dev); dev_activate(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -1588,7 +1699,7 @@ static void __dev_close_many(struct list_head *head) if (ops->ndo_stop) ops->ndo_stop(dev); - dev->flags &= ~IFF_UP; + netif_set_up(dev, false); netpoll_poll_enable(dev); } } @@ -6674,6 +6785,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded) struct napi_struct *napi; int err = 0; + netdev_assert_locked_or_invisible(dev); + if (dev->threaded == threaded) return 0; @@ -6800,9 +6913,12 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } -void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) +void netif_napi_add_weight_locked(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) { + netdev_assert_locked(dev); if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) return; @@ -6841,15 +6957,17 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, */ if (dev->threaded && napi_kthread_create(napi)) dev->threaded = false; - netif_napi_set_irq(napi, -1); + netif_napi_set_irq_locked(napi, -1); } -EXPORT_SYMBOL(netif_napi_add_weight); +EXPORT_SYMBOL(netif_napi_add_weight_locked); -void napi_disable(struct napi_struct *n) +void napi_disable_locked(struct napi_struct *n) { unsigned long val, new; might_sleep(); + netdev_assert_locked(n->dev); + set_bit(NAPI_STATE_DISABLE, &n->state); val = READ_ONCE(n->state); @@ -6872,16 +6990,25 @@ void napi_disable(struct napi_struct *n) clear_bit(NAPI_STATE_DISABLE, &n->state); } -EXPORT_SYMBOL(napi_disable); +EXPORT_SYMBOL(napi_disable_locked); /** - * napi_enable - enable NAPI scheduling - * @n: NAPI context + * napi_disable() - prevent NAPI from scheduling + * @n: NAPI context * - * Resume NAPI from being scheduled on this context. - * Must be paired with napi_disable. + * Stop NAPI from being scheduled on this context. + * Waits till any outstanding processing completes. + * Takes netdev_lock() for associated net_device. */ -void napi_enable(struct napi_struct *n) +void napi_disable(struct napi_struct *n) +{ + netdev_lock(n->dev); + napi_disable_locked(n); + netdev_unlock(n->dev); +} +EXPORT_SYMBOL(napi_disable); + +void napi_enable_locked(struct napi_struct *n) { unsigned long new, val = READ_ONCE(n->state); @@ -6898,6 +7025,22 @@ void napi_enable(struct napi_struct *n) new |= NAPIF_STATE_THREADED; } while (!try_cmpxchg(&n->state, &val, new)); } +EXPORT_SYMBOL(napi_enable_locked); + +/** + * napi_enable() - enable NAPI scheduling + * @n: NAPI context + * + * Enable scheduling of a NAPI instance. + * Must be paired with napi_disable(). + * Takes netdev_lock() for associated net_device. + */ +void napi_enable(struct napi_struct *n) +{ + netdev_lock(n->dev); + napi_enable_locked(n); + netdev_unlock(n->dev); +} EXPORT_SYMBOL(napi_enable); static void flush_gro_hash(struct napi_struct *napi) @@ -6914,8 +7057,10 @@ static void flush_gro_hash(struct napi_struct *napi) } /* Must be called in process context */ -void __netif_napi_del(struct napi_struct *napi) +void __netif_napi_del_locked(struct napi_struct *napi) { + netdev_assert_locked(napi->dev); + if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; @@ -6935,7 +7080,7 @@ void __netif_napi_del(struct napi_struct *napi) napi->thread = NULL; } } -EXPORT_SYMBOL(__netif_napi_del); +EXPORT_SYMBOL(__netif_napi_del_locked); static int __napi_poll(struct napi_struct *n, bool *repoll) { @@ -10695,7 +10840,9 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_kobject(dev); + netdev_lock(dev); WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED); + netdev_unlock(dev); if (ret) goto err_uninit_notify; @@ -10969,7 +11116,9 @@ void netdev_run_todo(void) continue; } + netdev_lock(dev); WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED); + netdev_unlock(dev); linkwatch_sync_dev(dev); } @@ -11575,7 +11724,9 @@ void unregister_netdevice_many_notify(struct list_head *head, list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ unlist_netdevice(dev); + netdev_lock(dev); WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); + netdev_unlock(dev); } flush_all_backlogs(); diff --git a/net/core/dev.h b/net/core/dev.h index d8966847794c..a5b166bbd169 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -2,6 +2,7 @@ #ifndef _NET_CORE_DEV_H #define _NET_CORE_DEV_H +#include <linux/cleanup.h> #include <linux/types.h> #include <linux/rwsem.h> #include <linux/netdevice.h> @@ -22,9 +23,23 @@ struct sd_flow_limit { extern int netdev_flow_limit_table_len; -struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id); +struct napi_struct * +netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); +struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); +struct net_device *__netdev_put_lock(struct net_device *dev); +struct net_device * +netdev_xa_find_lock(struct net *net, struct net_device *dev, + unsigned long *index); + +DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T)); + +#define for_each_netdev_lock_scoped(net, var_name, ifindex) \ + for (struct net_device *var_name __free(netdev_unlock) = NULL; \ + (var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \ + ifindex++) + #ifdef CONFIG_PROC_FS int __init dev_proc_init(void); #else @@ -112,6 +127,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh); +static inline void netif_set_up(struct net_device *dev, bool value) +{ + if (value) + dev->flags |= IFF_UP; + else + dev->flags &= ~IFF_UP; + + netdev_lock(dev); + dev->up = value; + netdev_unlock(dev); +} + static inline void netif_set_gso_max_size(struct net_device *dev, unsigned int size) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2d9afc6e2161..07cb99b114bd 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -36,7 +36,7 @@ static const char fmt_uint[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; -/* Caller holds RTNL or RCU */ +/* Caller holds RTNL, netdev->lock or RCU */ static inline int dev_isalive(const struct net_device *dev) { return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; @@ -108,6 +108,36 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, return ret; } +/* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */ +static ssize_t +netdev_lock_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len, + int (*set)(struct net_device *, unsigned long)) +{ + struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); + unsigned long new; + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = kstrtoul(buf, 0, &new); + if (ret) + return ret; + + netdev_lock(netdev); + + if (dev_isalive(netdev)) { + ret = (*set)(netdev, new); + if (ret == 0) + ret = len; + } + netdev_unlock(netdev); + + return ret; +} + NETDEVICE_SHOW_RO(dev_id, fmt_hex); NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); @@ -420,7 +450,7 @@ static ssize_t gro_flush_timeout_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - return netdev_store(dev, attr, buf, len, change_gro_flush_timeout); + return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout); } NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); @@ -440,7 +470,8 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs); + return netdev_lock_store(dev, attr, buf, len, + change_napi_defer_hard_irqs); } NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint); @@ -638,7 +669,7 @@ static ssize_t threaded_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, modify_napi_threaded); + return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded); } static DEVICE_ATTR_RW(threaded); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index c59619a2ec23..715f85c6b62e 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -167,7 +167,7 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, void *hdr; pid_t pid; - if (!(napi->dev->flags & IFF_UP)) + if (!napi->dev->up) return 0; hdr = genlmsg_iput(rsp, info); @@ -229,20 +229,15 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) if (!rsp) return -ENOMEM; - rtnl_lock(); - rcu_read_lock(); - - napi = netdev_napi_by_id(genl_info_net(info), napi_id); + napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_fill_one(rsp, napi, info); + netdev_unlock(napi->dev); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); err = -ENOENT; } - rcu_read_unlock(); - rtnl_unlock(); - if (err) { goto err_free_msg; } else if (!rsp->len) { @@ -266,7 +261,7 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, unsigned int prev_id; int err = 0; - if (!(netdev->flags & IFF_UP)) + if (!netdev->up) return err; prev_id = UINT_MAX; @@ -301,22 +296,22 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (info->attrs[NETDEV_A_NAPI_IFINDEX]) ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]); - rtnl_lock(); if (ifindex) { - netdev = __dev_get_by_index(net, ifindex); - if (netdev) + netdev = netdev_get_by_index_lock(net, ifindex); + if (netdev) { err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); - else + netdev_unlock(netdev); + } else { err = -ENODEV; + } } else { - for_each_netdev_dump(net, netdev, ctx->ifindex) { + for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); if (err < 0) break; ctx->napi_id = 0; } } - rtnl_unlock(); return err; } @@ -357,20 +352,15 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info) napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); - rtnl_lock(); - rcu_read_lock(); - - napi = netdev_napi_by_id(genl_info_net(info), napi_id); + napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id); if (napi) { err = netdev_nl_napi_set_config(napi, info); + netdev_unlock(napi->dev); } else { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]); err = -ENOENT; } - rcu_read_unlock(); - rtnl_unlock(); - return err; } @@ -442,7 +432,7 @@ netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, { int err; - if (!(netdev->flags & IFF_UP)) + if (!netdev->up) return -ENOENT; err = netdev_nl_queue_validate(netdev, q_idx, q_type); @@ -474,11 +464,13 @@ int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - netdev = __dev_get_by_index(genl_info_net(info), ifindex); - if (netdev) + netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); + if (netdev) { err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); - else + netdev_unlock(netdev); + } else { err = -ENODEV; + } rtnl_unlock(); @@ -499,7 +491,7 @@ netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, { int err = 0; - if (!(netdev->flags & IFF_UP)) + if (!netdev->up) return err; for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) { @@ -532,13 +524,15 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ifindex) { - netdev = __dev_get_by_index(net, ifindex); - if (netdev) + netdev = netdev_get_by_index_lock(net, ifindex); + if (netdev) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); - else + netdev_unlock(netdev); + } else { err = -ENODEV; + } } else { - for_each_netdev_dump(net, netdev, ctx->ifindex) { + for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) { err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); if (err < 0) break; diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 15463062fe7b..7101a48bce54 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -40,7 +40,7 @@ static void net_shaper_lock(struct net_shaper_binding *binding) { switch (binding->type) { case NET_SHAPER_BINDING_TYPE_NETDEV: - mutex_lock(&binding->netdev->lock); + netdev_lock(binding->netdev); break; } } @@ -49,7 +49,7 @@ static void net_shaper_unlock(struct net_shaper_binding *binding) { switch (binding->type) { case NET_SHAPER_BINDING_TYPE_NETDEV: - mutex_unlock(&binding->netdev->lock); + netdev_unlock(binding->netdev); break; } } @@ -1398,7 +1398,7 @@ void net_shaper_set_real_num_tx_queues(struct net_device *dev, /* Only drivers implementing shapers support ensure * the lock is acquired in advance. */ - lockdep_assert_held(&dev->lock); + netdev_assert_locked(dev); /* Take action only when decreasing the tx queue number. */ for (i = txq; i < dev->real_num_tx_queues; ++i) { |