summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c183
-rw-r--r--net/core/dev.h29
-rw-r--r--net/core/net-sysfs.c39
-rw-r--r--net/core/netdev-genl.c56
-rw-r--r--net/shaper/shaper.c6
5 files changed, 258 insertions, 55 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 47e6b0f73cfc..b6722ed9767a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -768,7 +768,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
}
/* must be called under rcu_read_lock(), as we dont take a reference */
-struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
+static struct napi_struct *
+netdev_napi_by_id(struct net *net, unsigned int napi_id)
{
struct napi_struct *napi;
@@ -785,6 +786,49 @@ struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
}
/**
+ * netdev_napi_by_id_lock() - find a device by NAPI ID and lock it
+ * @net: the applicable net namespace
+ * @napi_id: ID of a NAPI of a target device
+ *
+ * Find a NAPI instance with @napi_id. Lock its device.
+ * The device must be in %NETREG_REGISTERED state for lookup to succeed.
+ * netdev_unlock() must be called to release it.
+ *
+ * Return: pointer to NAPI, its device with lock held, NULL if not found.
+ */
+struct napi_struct *
+netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
+{
+ struct napi_struct *napi;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ napi = netdev_napi_by_id(net, napi_id);
+ if (!napi || READ_ONCE(napi->dev->reg_state) != NETREG_REGISTERED) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
+ dev = napi->dev;
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ dev = __netdev_put_lock(dev);
+ if (!dev)
+ return NULL;
+
+ rcu_read_lock();
+ napi = netdev_napi_by_id(net, napi_id);
+ if (napi && napi->dev != dev)
+ napi = NULL;
+ rcu_read_unlock();
+
+ if (!napi)
+ netdev_unlock(dev);
+ return napi;
+}
+
+/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
@@ -972,6 +1016,73 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
return napi ? napi->dev : NULL;
}
+/* Release the held reference on the net_device, and if the net_device
+ * is still registered try to lock the instance lock. If device is being
+ * unregistered NULL will be returned (but the reference has been released,
+ * either way!)
+ *
+ * This helper is intended for locking net_device after it has been looked up
+ * using a lockless lookup helper. Lock prevents the instance from going away.
+ */
+struct net_device *__netdev_put_lock(struct net_device *dev)
+{
+ netdev_lock(dev);
+ if (dev->reg_state > NETREG_REGISTERED) {
+ netdev_unlock(dev);
+ dev_put(dev);
+ return NULL;
+ }
+ dev_put(dev);
+ return dev;
+}
+
+/**
+ * netdev_get_by_index_lock() - find a device by its ifindex
+ * @net: the applicable net namespace
+ * @ifindex: index of device
+ *
+ * Search for an interface by index. If a valid device
+ * with @ifindex is found it will be returned with netdev->lock held.
+ * netdev_unlock() must be called to release it.
+ *
+ * Return: pointer to a device with lock held, NULL if not found.
+ */
+struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return NULL;
+
+ return __netdev_put_lock(dev);
+}
+
+struct net_device *
+netdev_xa_find_lock(struct net *net, struct net_device *dev,
+ unsigned long *index)
+{
+ if (dev)
+ netdev_unlock(dev);
+
+ do {
+ rcu_read_lock();
+ dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT);
+ if (!dev) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ dev = __netdev_put_lock(dev);
+ if (dev)
+ return dev;
+
+ (*index)++;
+ } while (true);
+}
+
static DEFINE_SEQLOCK(netdev_rename_lock);
void netdev_copy_name(struct net_device *dev, char *name)
@@ -1509,7 +1620,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
else {
- dev->flags |= IFF_UP;
+ netif_set_up(dev, true);
dev_set_rx_mode(dev);
dev_activate(dev);
add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -1588,7 +1699,7 @@ static void __dev_close_many(struct list_head *head)
if (ops->ndo_stop)
ops->ndo_stop(dev);
- dev->flags &= ~IFF_UP;
+ netif_set_up(dev, false);
netpoll_poll_enable(dev);
}
}
@@ -6674,6 +6785,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
struct napi_struct *napi;
int err = 0;
+ netdev_assert_locked_or_invisible(dev);
+
if (dev->threaded == threaded)
return 0;
@@ -6800,9 +6913,12 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
-void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight)
+void netif_napi_add_weight_locked(struct net_device *dev,
+ struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int),
+ int weight)
{
+ netdev_assert_locked(dev);
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
@@ -6841,15 +6957,17 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
*/
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = false;
- netif_napi_set_irq(napi, -1);
+ netif_napi_set_irq_locked(napi, -1);
}
-EXPORT_SYMBOL(netif_napi_add_weight);
+EXPORT_SYMBOL(netif_napi_add_weight_locked);
-void napi_disable(struct napi_struct *n)
+void napi_disable_locked(struct napi_struct *n)
{
unsigned long val, new;
might_sleep();
+ netdev_assert_locked(n->dev);
+
set_bit(NAPI_STATE_DISABLE, &n->state);
val = READ_ONCE(n->state);
@@ -6872,16 +6990,25 @@ void napi_disable(struct napi_struct *n)
clear_bit(NAPI_STATE_DISABLE, &n->state);
}
-EXPORT_SYMBOL(napi_disable);
+EXPORT_SYMBOL(napi_disable_locked);
/**
- * napi_enable - enable NAPI scheduling
- * @n: NAPI context
+ * napi_disable() - prevent NAPI from scheduling
+ * @n: NAPI context
*
- * Resume NAPI from being scheduled on this context.
- * Must be paired with napi_disable.
+ * Stop NAPI from being scheduled on this context.
+ * Waits till any outstanding processing completes.
+ * Takes netdev_lock() for associated net_device.
*/
-void napi_enable(struct napi_struct *n)
+void napi_disable(struct napi_struct *n)
+{
+ netdev_lock(n->dev);
+ napi_disable_locked(n);
+ netdev_unlock(n->dev);
+}
+EXPORT_SYMBOL(napi_disable);
+
+void napi_enable_locked(struct napi_struct *n)
{
unsigned long new, val = READ_ONCE(n->state);
@@ -6898,6 +7025,22 @@ void napi_enable(struct napi_struct *n)
new |= NAPIF_STATE_THREADED;
} while (!try_cmpxchg(&n->state, &val, new));
}
+EXPORT_SYMBOL(napi_enable_locked);
+
+/**
+ * napi_enable() - enable NAPI scheduling
+ * @n: NAPI context
+ *
+ * Enable scheduling of a NAPI instance.
+ * Must be paired with napi_disable().
+ * Takes netdev_lock() for associated net_device.
+ */
+void napi_enable(struct napi_struct *n)
+{
+ netdev_lock(n->dev);
+ napi_enable_locked(n);
+ netdev_unlock(n->dev);
+}
EXPORT_SYMBOL(napi_enable);
static void flush_gro_hash(struct napi_struct *napi)
@@ -6914,8 +7057,10 @@ static void flush_gro_hash(struct napi_struct *napi)
}
/* Must be called in process context */
-void __netif_napi_del(struct napi_struct *napi)
+void __netif_napi_del_locked(struct napi_struct *napi)
{
+ netdev_assert_locked(napi->dev);
+
if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
return;
@@ -6935,7 +7080,7 @@ void __netif_napi_del(struct napi_struct *napi)
napi->thread = NULL;
}
}
-EXPORT_SYMBOL(__netif_napi_del);
+EXPORT_SYMBOL(__netif_napi_del_locked);
static int __napi_poll(struct napi_struct *n, bool *repoll)
{
@@ -10695,7 +10840,9 @@ int register_netdevice(struct net_device *dev)
ret = netdev_register_kobject(dev);
+ netdev_lock(dev);
WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
+ netdev_unlock(dev);
if (ret)
goto err_uninit_notify;
@@ -10969,7 +11116,9 @@ void netdev_run_todo(void)
continue;
}
+ netdev_lock(dev);
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
+ netdev_unlock(dev);
linkwatch_sync_dev(dev);
}
@@ -11575,7 +11724,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
unlist_netdevice(dev);
+ netdev_lock(dev);
WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
+ netdev_unlock(dev);
}
flush_all_backlogs();
diff --git a/net/core/dev.h b/net/core/dev.h
index d8966847794c..a5b166bbd169 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -2,6 +2,7 @@
#ifndef _NET_CORE_DEV_H
#define _NET_CORE_DEV_H
+#include <linux/cleanup.h>
#include <linux/types.h>
#include <linux/rwsem.h>
#include <linux/netdevice.h>
@@ -22,9 +23,23 @@ struct sd_flow_limit {
extern int netdev_flow_limit_table_len;
-struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
+struct napi_struct *
+netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
+struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
+struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *
+netdev_xa_find_lock(struct net *net, struct net_device *dev,
+ unsigned long *index);
+
+DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T));
+
+#define for_each_netdev_lock_scoped(net, var_name, ifindex) \
+ for (struct net_device *var_name __free(netdev_unlock) = NULL; \
+ (var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \
+ ifindex++)
+
#ifdef CONFIG_PROC_FS
int __init dev_proc_init(void);
#else
@@ -112,6 +127,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
void unregister_netdevice_many_notify(struct list_head *head,
u32 portid, const struct nlmsghdr *nlh);
+static inline void netif_set_up(struct net_device *dev, bool value)
+{
+ if (value)
+ dev->flags |= IFF_UP;
+ else
+ dev->flags &= ~IFF_UP;
+
+ netdev_lock(dev);
+ dev->up = value;
+ netdev_unlock(dev);
+}
+
static inline void netif_set_gso_max_size(struct net_device *dev,
unsigned int size)
{
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 2d9afc6e2161..07cb99b114bd 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -36,7 +36,7 @@ static const char fmt_uint[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
-/* Caller holds RTNL or RCU */
+/* Caller holds RTNL, netdev->lock or RCU */
static inline int dev_isalive(const struct net_device *dev)
{
return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
@@ -108,6 +108,36 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
return ret;
}
+/* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */
+static ssize_t
+netdev_lock_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len,
+ int (*set)(struct net_device *, unsigned long))
+{
+ struct net_device *netdev = to_net_dev(dev);
+ struct net *net = dev_net(netdev);
+ unsigned long new;
+ int ret;
+
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ ret = kstrtoul(buf, 0, &new);
+ if (ret)
+ return ret;
+
+ netdev_lock(netdev);
+
+ if (dev_isalive(netdev)) {
+ ret = (*set)(netdev, new);
+ if (ret == 0)
+ ret = len;
+ }
+ netdev_unlock(netdev);
+
+ return ret;
+}
+
NETDEVICE_SHOW_RO(dev_id, fmt_hex);
NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
@@ -420,7 +450,7 @@ static ssize_t gro_flush_timeout_store(struct device *dev,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return netdev_store(dev, attr, buf, len, change_gro_flush_timeout);
+ return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout);
}
NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
@@ -440,7 +470,8 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
+ return netdev_lock_store(dev, attr, buf, len,
+ change_napi_defer_hard_irqs);
}
NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
@@ -638,7 +669,7 @@ static ssize_t threaded_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return netdev_store(dev, attr, buf, len, modify_napi_threaded);
+ return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded);
}
static DEVICE_ATTR_RW(threaded);
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index c59619a2ec23..715f85c6b62e 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -167,7 +167,7 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
void *hdr;
pid_t pid;
- if (!(napi->dev->flags & IFF_UP))
+ if (!napi->dev->up)
return 0;
hdr = genlmsg_iput(rsp, info);
@@ -229,20 +229,15 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
if (!rsp)
return -ENOMEM;
- rtnl_lock();
- rcu_read_lock();
-
- napi = netdev_napi_by_id(genl_info_net(info), napi_id);
+ napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
+ netdev_unlock(napi->dev);
} else {
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
err = -ENOENT;
}
- rcu_read_unlock();
- rtnl_unlock();
-
if (err) {
goto err_free_msg;
} else if (!rsp->len) {
@@ -266,7 +261,7 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
unsigned int prev_id;
int err = 0;
- if (!(netdev->flags & IFF_UP))
+ if (!netdev->up)
return err;
prev_id = UINT_MAX;
@@ -301,22 +296,22 @@ int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (info->attrs[NETDEV_A_NAPI_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
- rtnl_lock();
if (ifindex) {
- netdev = __dev_get_by_index(net, ifindex);
- if (netdev)
+ netdev = netdev_get_by_index_lock(net, ifindex);
+ if (netdev) {
err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
- else
+ netdev_unlock(netdev);
+ } else {
err = -ENODEV;
+ }
} else {
- for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
ctx->napi_id = 0;
}
}
- rtnl_unlock();
return err;
}
@@ -357,20 +352,15 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
- rtnl_lock();
- rcu_read_lock();
-
- napi = netdev_napi_by_id(genl_info_net(info), napi_id);
+ napi = netdev_napi_by_id_lock(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_set_config(napi, info);
+ netdev_unlock(napi->dev);
} else {
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
err = -ENOENT;
}
- rcu_read_unlock();
- rtnl_unlock();
-
return err;
}
@@ -442,7 +432,7 @@ netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
{
int err;
- if (!(netdev->flags & IFF_UP))
+ if (!netdev->up)
return -ENOENT;
err = netdev_nl_queue_validate(netdev, q_idx, q_type);
@@ -474,11 +464,13 @@ int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
- netdev = __dev_get_by_index(genl_info_net(info), ifindex);
- if (netdev)
+ netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
+ if (netdev) {
err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
- else
+ netdev_unlock(netdev);
+ } else {
err = -ENODEV;
+ }
rtnl_unlock();
@@ -499,7 +491,7 @@ netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
{
int err = 0;
- if (!(netdev->flags & IFF_UP))
+ if (!netdev->up)
return err;
for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) {
@@ -532,13 +524,15 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
if (ifindex) {
- netdev = __dev_get_by_index(net, ifindex);
- if (netdev)
+ netdev = netdev_get_by_index_lock(net, ifindex);
+ if (netdev) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
- else
+ netdev_unlock(netdev);
+ } else {
err = -ENODEV;
+ }
} else {
- for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
index 15463062fe7b..7101a48bce54 100644
--- a/net/shaper/shaper.c
+++ b/net/shaper/shaper.c
@@ -40,7 +40,7 @@ static void net_shaper_lock(struct net_shaper_binding *binding)
{
switch (binding->type) {
case NET_SHAPER_BINDING_TYPE_NETDEV:
- mutex_lock(&binding->netdev->lock);
+ netdev_lock(binding->netdev);
break;
}
}
@@ -49,7 +49,7 @@ static void net_shaper_unlock(struct net_shaper_binding *binding)
{
switch (binding->type) {
case NET_SHAPER_BINDING_TYPE_NETDEV:
- mutex_unlock(&binding->netdev->lock);
+ netdev_unlock(binding->netdev);
break;
}
}
@@ -1398,7 +1398,7 @@ void net_shaper_set_real_num_tx_queues(struct net_device *dev,
/* Only drivers implementing shapers support ensure
* the lock is acquired in advance.
*/
- lockdep_assert_held(&dev->lock);
+ netdev_assert_locked(dev);
/* Take action only when decreasing the tx queue number. */
for (i = txq; i < dev->real_num_tx_queues; ++i) {