diff options
41 files changed, 1046 insertions, 489 deletions
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 1d37038e9fbe..d235a7364893 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -210,49 +210,55 @@ packets is preferred. struct net_device synchronization rules ======================================= ndo_open: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process ndo_stop: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process Note: netif_running() is guaranteed false ndo_do_ioctl: Synchronization: rtnl_lock() semaphore. - Context: process - This is only called by network subsystems internally, - not by user space calling ioctl as it was in before - linux-5.14. + This is only called by network subsystems internally, + not by user space calling ioctl as it was in before + linux-5.14. ndo_siocbond: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process - Used by the bonding driver for the SIOCBOND family of - ioctl commands. + Used by the bonding driver for the SIOCBOND family of + ioctl commands. ndo_siocwandev: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process Used by the drivers/net/wan framework to handle the SIOCWANDEV ioctl with the if_settings structure. ndo_siocdevprivate: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process This is used to implement SIOCDEVPRIVATE ioctl helpers. These should not be added to new drivers, so don't use. ndo_eth_ioctl: - Synchronization: rtnl_lock() semaphore. + Synchronization: rtnl_lock() semaphore. In addition, netdev instance + lock if the driver implements queue management or shaper API. Context: process ndo_get_stats: - Synchronization: rtnl_lock() semaphore, or RCU. + Synchronization: RCU (can be called concurrently with the stats + update path). Context: atomic (can't sleep under RCU) ndo_start_xmit: @@ -284,6 +290,10 @@ ndo_set_rx_mode: Synchronization: netif_addr_lock spinlock. Context: BHs disabled +Most ndo callbacks not specified in the list above are running +under ``rtnl_lock``. In addition, netdev instance lock is taken as well if +the driver implements queue management or shaper API. + struct napi_struct synchronization rules ======================================== napi->poll: @@ -298,6 +308,35 @@ napi->poll: softirq will be called with interrupts disabled by netconsole. +struct netdev_queue_mgmt_ops synchronization rules +================================================== + +All queue management ndo callbacks are holding netdev instance lock. + +RTNL and netdev instance lock +============================= + +Historically, all networking control operations were protected by a single +global lock known as ``rtnl_lock``. There is an ongoing effort to replace this +global lock with separate locks for each network namespace. Additionally, +properties of individual netdev are increasingly protected by per-netdev locks. + +For device drivers that implement shaping or queue management APIs, all control +operations will be performed under the netdev instance lock. Currently, this +instance lock is acquired within the context of ``rtnl_lock``. The drivers +can also explicitly request instance lock to be acquired via +``request_ops_lock``. In the future, there will be an option for individual +drivers to opt out of using ``rtnl_lock`` and instead perform their control +operations directly under the netdev instance lock. + +Devices drivers are encouraged to rely on the instance lock where possible. + +For the (mostly software) drivers that need to interact with the core stack, +there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., +``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle +acquiring the instance lock themselves, while the ``netif_xxx`` functions +assume that the driver has already acquired the instance lock. + NETDEV_INTERNAL symbol namespace ================================ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe7072336e8f..cf0b02720dd8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -855,7 +855,6 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) { const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - int (*ioctl)(struct net_device *, struct ifreq *, int); struct ifreq ifr; struct mii_ioctl_data *mii; @@ -871,8 +870,7 @@ static int bond_check_dev_link(struct bonding *bond, BMSR_LSTATUS : 0; /* Ethtool can't be used, fallback to MII ioctls. */ - ioctl = slave_ops->ndo_eth_ioctl; - if (ioctl) { + if (slave_ops->ndo_eth_ioctl) { /* TODO: set pointer to correct ioctl on a per team member * bases to make this more efficient. that is, once * we determine the correct ioctl, we will always @@ -888,9 +886,10 @@ static int bond_check_dev_link(struct bonding *bond, /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ); mii = if_mii(&ifr); - if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { + + if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { mii->reg_num = MII_BMSR; - if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) + if (dev_eth_ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) return mii->val_out & BMSR_LSTATUS; } } @@ -2645,10 +2644,13 @@ static int __bond_release_one(struct net_device *bond_dev, dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); } - if (unregister) + if (unregister) { + netdev_lock_ops(slave_dev); __dev_set_mtu(slave_dev, slave->original_mtu); - else + netdev_unlock_ops(slave_dev); + } else { dev_set_mtu(slave_dev, slave->original_mtu); + } if (!netif_is_bond_master(slave_dev)) slave_dev->priv_flags &= ~IFF_BONDING; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 15c57a06ecaf..1a1e6da77777 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5246,8 +5246,10 @@ static void bnxt_free_ntp_fltrs(struct bnxt *bp, bool all) { int i; - /* Under rtnl_lock and all our NAPIs have been disabled. It's - * safe to delete the hash table. + netdev_assert_locked(bp->dev); + + /* Under netdev instance lock and all our NAPIs have been disabled. + * It's safe to delete the hash table. */ for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) { struct hlist_head *head; @@ -11508,7 +11510,7 @@ static int bnxt_request_irq(struct bnxt *bp) if (rc) break; - netif_napi_set_irq(&bp->bnapi[i]->napi, irq->vector); + netif_napi_set_irq_locked(&bp->bnapi[i]->napi, irq->vector); irq->requested = 1; if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) { @@ -11557,9 +11559,9 @@ static void bnxt_del_napi(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; - __netif_napi_del(&bnapi->napi); + __netif_napi_del_locked(&bnapi->napi); } - /* We called __netif_napi_del(), we need + /* We called __netif_napi_del_locked(), we need * to respect an RCU grace period before freeing napi structures. */ synchronize_net(); @@ -11578,12 +11580,12 @@ static void bnxt_init_napi(struct bnxt *bp) cp_nr_rings--; for (i = 0; i < cp_nr_rings; i++) { bnapi = bp->bnapi[i]; - netif_napi_add_config(bp->dev, &bnapi->napi, poll_fn, - bnapi->index); + netif_napi_add_config_locked(bp->dev, &bnapi->napi, poll_fn, + bnapi->index); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bnapi = bp->bnapi[cp_nr_rings]; - netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll_nitroa0); + netif_napi_add_locked(bp->dev, &bnapi->napi, bnxt_poll_nitroa0); } } @@ -11604,7 +11606,7 @@ static void bnxt_disable_napi(struct bnxt *bp) cpr->sw_stats->tx.tx_resets++; if (bnapi->in_reset) cpr->sw_stats->rx.rx_resets++; - napi_disable(&bnapi->napi); + napi_disable_locked(&bnapi->napi); } } @@ -11626,7 +11628,7 @@ static void bnxt_enable_napi(struct bnxt *bp) INIT_WORK(&cpr->dim.work, bnxt_dim_work); cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } - napi_enable(&bnapi->napi); + napi_enable_locked(&bnapi->napi); } } @@ -12789,7 +12791,6 @@ open_err_free_mem: return rc; } -/* rtnl_lock held */ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -12805,9 +12806,9 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) return rc; } -/* rtnl_lock held, open the NIC half way by allocating all resources, but - * NAPI, IRQ, and TX are not enabled. This is mainly used for offline - * self tests. +/* netdev instance lock held, open the NIC half way by allocating all + * resources, but NAPI, IRQ, and TX are not enabled. This is mainly used + * for offline self tests. */ int bnxt_half_open_nic(struct bnxt *bp) { @@ -12842,8 +12843,8 @@ half_open_err: return rc; } -/* rtnl_lock held, this call can only be made after a previous successful - * call to bnxt_half_open_nic(). +/* netdev instance lock held, this call can only be made after a previous + * successful call to bnxt_half_open_nic(). */ void bnxt_half_close_nic(struct bnxt *bp) { @@ -12952,10 +12953,11 @@ void bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { /* If we get here, it means firmware reset is in progress * while we are trying to close. We can safely proceed with - * the close because we are holding rtnl_lock(). Some firmware - * messages may fail as we proceed to close. We set the - * ABORT_ERR flag here so that the FW reset thread will later - * abort when it gets the rtnl_lock() and sees the flag. + * the close because we are holding netdev instance lock. + * Some firmware messages may fail as we proceed to close. + * We set the ABORT_ERR flag here so that the FW reset thread + * will later abort when it gets the netdev instance lock + * and sees the flag. */ netdev_warn(bp->dev, "FW reset in progress during close, FW reset will be aborted\n"); set_bit(BNXT_STATE_ABORT_ERR, &bp->state); @@ -13046,7 +13048,7 @@ static int bnxt_hwrm_port_phy_write(struct bnxt *bp, u16 phy_addr, u16 reg, return hwrm_req_send(bp, req); } -/* rtnl_lock held */ +/* netdev instance lock held */ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *mdio = if_mii(ifr); @@ -13965,30 +13967,31 @@ bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); } -static void bnxt_rtnl_lock_sp(struct bnxt *bp) +static void bnxt_lock_sp(struct bnxt *bp) { /* We are called from bnxt_sp_task which has BNXT_STATE_IN_SP_TASK * set. If the device is being closed, bnxt_close() may be holding - * rtnl() and waiting for BNXT_STATE_IN_SP_TASK to clear. So we - * must clear BNXT_STATE_IN_SP_TASK before holding rtnl(). + * netdev instance lock and waiting for BNXT_STATE_IN_SP_TASK to clear. + * So we must clear BNXT_STATE_IN_SP_TASK before holding netdev + * instance lock. */ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_lock(); + netdev_lock(bp->dev); } -static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +static void bnxt_unlock_sp(struct bnxt *bp) { set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); - rtnl_unlock(); + netdev_unlock(bp->dev); } /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -13996,9 +13999,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14037,7 +14040,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -14093,7 +14096,7 @@ static bool is_bnxt_fw_ok(struct bnxt *bp) return false; } -/* rtnl_lock is acquired before calling this function */ +/* netdev instance lock is acquired before calling this function */ static void bnxt_force_fw_reset(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; @@ -14136,9 +14139,9 @@ void bnxt_fw_exception(struct bnxt *bp) netdev_warn(bp->dev, "Detected firmware fatal condition, initiating reset\n"); set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); bnxt_ulp_stop(bp); - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); bnxt_force_fw_reset(bp); - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } /* Returns the number of registered VFs, or 1 if VF configuration is pending, or @@ -14168,7 +14171,7 @@ static int bnxt_get_registered_vfs(struct bnxt *bp) void bnxt_fw_reset(struct bnxt *bp) { bnxt_ulp_stop(bp); - bnxt_rtnl_lock_sp(bp); + bnxt_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state) && !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; @@ -14214,7 +14217,7 @@ void bnxt_fw_reset(struct bnxt *bp) bnxt_queue_fw_reset_work(bp, tmo); } fw_reset_exit: - bnxt_rtnl_unlock_sp(bp); + bnxt_unlock_sp(bp); } static void bnxt_chk_missed_irq(struct bnxt *bp) @@ -14413,7 +14416,7 @@ static void bnxt_sp_task(struct work_struct *work) static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, int *max_cp); -/* Under rtnl_lock */ +/* Under netdev instance lock */ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp) { @@ -14841,10 +14844,10 @@ static void bnxt_fw_reset_task(struct work_struct *work) return; } bp->fw_reset_timestamp = jiffies; - rtnl_lock(); + netdev_lock(bp->dev); if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { bnxt_fw_reset_abort(bp, rc); - rtnl_unlock(); + netdev_unlock(bp->dev); goto ulp_start; } bnxt_fw_reset_close(bp); @@ -14855,7 +14858,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV; tmo = bp->fw_reset_min_dsecs * HZ / 10; } - rtnl_unlock(); + netdev_unlock(bp->dev); bnxt_queue_fw_reset_work(bp, tmo); return; } @@ -14929,7 +14932,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!rtnl_trylock()) { + while (!netdev_trylock(bp->dev)) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } @@ -14937,7 +14940,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); - rtnl_unlock(); + netdev_unlock(bp->dev); goto ulp_start; } @@ -14956,13 +14959,13 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_recovery_done(bp); bnxt_dl_health_fw_status_update(bp, true); } - rtnl_unlock(); + netdev_unlock(bp->dev); bnxt_ulp_start(bp, 0); bnxt_reenable_sriov(bp); - rtnl_lock(); + netdev_lock(bp->dev); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); - rtnl_unlock(); + netdev_unlock(bp->dev); break; } return; @@ -14975,9 +14978,9 @@ fw_reset_abort_status: netdev_err(bp->dev, "fw_health_status 0x%x\n", sts); } fw_reset_abort: - rtnl_lock(); + netdev_lock(bp->dev); bnxt_fw_reset_abort(bp, rc); - rtnl_unlock(); + netdev_unlock(bp->dev); ulp_start: bnxt_ulp_start(bp, rc); } @@ -15069,13 +15072,14 @@ init_err: return rc; } -/* rtnl_lock held */ static int bnxt_change_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct bnxt *bp = netdev_priv(dev); int rc = 0; + netdev_assert_locked(dev); + if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -15096,11 +15100,12 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p) return rc; } -/* rtnl_lock held */ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) { struct bnxt *bp = netdev_priv(dev); + netdev_assert_locked(dev); + if (netif_running(dev)) bnxt_close_nic(bp, true, false); @@ -16257,7 +16262,7 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp) { int rc; - ASSERT_RTNL(); + netdev_ops_assert_locked(bp->dev); bnxt_hwrm_func_qcaps(bp); if (netif_running(bp->dev)) @@ -16657,7 +16662,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (!dev) return; - rtnl_lock(); + netdev_lock(dev); bp = netdev_priv(dev); if (!bp) goto shutdown_exit; @@ -16675,7 +16680,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) } shutdown_exit: - rtnl_unlock(); + netdev_unlock(dev); } #ifdef CONFIG_PM_SLEEP @@ -16687,7 +16692,7 @@ static int bnxt_suspend(struct device *device) bnxt_ulp_stop(bp); - rtnl_lock(); + netdev_lock(dev); if (netif_running(dev)) { netif_device_detach(dev); rc = bnxt_close(dev); @@ -16696,7 +16701,7 @@ static int bnxt_suspend(struct device *device) bnxt_ptp_clear(bp); pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp, false); - rtnl_unlock(); + netdev_unlock(dev); return rc; } @@ -16706,7 +16711,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; - rtnl_lock(); + netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { netdev_err(dev, "Cannot re-enable PCI device during resume, err = %d\n", @@ -16749,7 +16754,7 @@ static int bnxt_resume(struct device *device) } resume_exit: - rtnl_unlock(); + netdev_unlock(bp->dev); bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); @@ -16784,7 +16789,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, bnxt_ulp_stop(bp); - rtnl_lock(); + netdev_lock(netdev); netif_device_detach(netdev); if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { @@ -16793,7 +16798,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, } if (abort || state == pci_channel_io_perm_failure) { - rtnl_unlock(); + netdev_unlock(netdev); return PCI_ERS_RESULT_DISCONNECT; } @@ -16812,7 +16817,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (pci_is_enabled(pdev)) pci_disable_device(pdev); bnxt_free_ctx_mem(bp, false); - rtnl_unlock(); + netdev_unlock(netdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -16842,7 +16847,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) msleep(900); - rtnl_lock(); + netdev_lock(netdev); if (pci_enable_device(pdev)) { dev_err(&pdev->dev, @@ -16897,7 +16902,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) reset_exit: clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_clear_reservations(bp, true); - rtnl_unlock(); + netdev_unlock(netdev); return result; } @@ -16916,7 +16921,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); - rtnl_lock(); + netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); if (!err) { @@ -16929,7 +16934,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) if (!err) netif_device_attach(netdev); - rtnl_unlock(); + netdev_unlock(netdev); bnxt_ulp_start(bp, err); if (!err) bnxt_reenable_sriov(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index ef8288fd68f4..b06fcddfc81c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -439,14 +439,17 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: { bnxt_ulp_stop(bp); rtnl_lock(); + netdev_lock(bp->dev); if (bnxt_sriov_cfg(bp)) { NL_SET_ERR_MSG_MOD(extack, "reload is unsupported while VFs are allocated or being configured"); + netdev_unlock(bp->dev); rtnl_unlock(); bnxt_ulp_start(bp, 0); return -EOPNOTSUPP; } if (bp->dev->reg_state == NETREG_UNREGISTERED) { + netdev_unlock(bp->dev); rtnl_unlock(); bnxt_ulp_start(bp, 0); return -ENODEV; @@ -459,6 +462,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, NL_SET_ERR_MSG_MOD(extack, "Failed to deregister"); if (netif_running(bp->dev)) dev_close(bp->dev); + netdev_unlock(bp->dev); rtnl_unlock(); break; } @@ -479,7 +483,9 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, return -EPERM; } rtnl_lock(); + netdev_lock(bp->dev); if (bp->dev->reg_state == NETREG_UNREGISTERED) { + netdev_unlock(bp->dev); rtnl_unlock(); return -ENODEV; } @@ -493,6 +499,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to activate firmware"); clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state); + netdev_unlock(bp->dev); rtnl_unlock(); } break; @@ -568,7 +575,9 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti } *actions_performed |= BIT(action); } else if (netif_running(bp->dev)) { + netdev_lock(bp->dev); dev_close(bp->dev); + netdev_unlock(bp->dev); } rtnl_unlock(); if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 12b6ed51fd88..5ddddd89052f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -946,7 +946,9 @@ void bnxt_sriov_disable(struct bnxt *bp) /* Reclaim all resources for the PF. */ rtnl_lock(); + netdev_lock(bp->dev); bnxt_restore_pf_fw_resources(bp); + netdev_unlock(bp->dev); rtnl_unlock(); } @@ -956,17 +958,21 @@ int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs) struct bnxt *bp = netdev_priv(dev); rtnl_lock(); + netdev_lock(dev); if (!netif_running(dev)) { netdev_warn(dev, "Reject SRIOV config request since if is down!\n"); + netdev_unlock(dev); rtnl_unlock(); return 0; } if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { netdev_warn(dev, "Reject SRIOV config request when FW reset is in progress\n"); + netdev_unlock(dev); rtnl_unlock(); return 0; } bp->sriov_cfg = true; + netdev_unlock(dev); rtnl_unlock(); if (pci_vfs_assigned(bp->pdev)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index e4a7f37036ed..a8e930d5dbb0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -112,7 +112,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, struct bnxt_ulp *ulp; int rc = 0; - rtnl_lock(); + netdev_lock(dev); mutex_lock(&edev->en_dev_lock); if (!bp->irq_tbl) { rc = -ENODEV; @@ -138,7 +138,7 @@ int bnxt_register_dev(struct bnxt_en_dev *edev, edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; exit: mutex_unlock(&edev->en_dev_lock); - rtnl_unlock(); + netdev_unlock(dev); return rc; } EXPORT_SYMBOL(bnxt_register_dev); @@ -151,7 +151,7 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) int i = 0; ulp = edev->ulp_tbl; - rtnl_lock(); + netdev_lock(dev); mutex_lock(&edev->en_dev_lock); if (ulp->msix_requested) edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; @@ -169,7 +169,7 @@ void bnxt_unregister_dev(struct bnxt_en_dev *edev) i++; } mutex_unlock(&edev->en_dev_lock); - rtnl_unlock(); + netdev_unlock(dev); return; } EXPORT_SYMBOL(bnxt_unregister_dev); @@ -309,12 +309,14 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - ops = rtnl_dereference(ulp->ulp_ops); + netdev_lock(bp->dev); + ops = rcu_dereference(ulp->ulp_ops); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); + netdev_unlock(bp->dev); } } @@ -333,7 +335,8 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - ops = rtnl_dereference(ulp->ulp_ops); + netdev_lock(bp->dev); + ops = rcu_dereference(ulp->ulp_ops); if (!ops || !ops->ulp_irq_restart) return; @@ -345,6 +348,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); + netdev_unlock(bp->dev); kfree(ent); } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 1467b94a6427..619f0844e778 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -257,8 +257,7 @@ bool bnxt_dev_is_vf_rep(struct net_device *dev) /* Called when the parent PF interface is closed: * As the mode transition from SWITCHDEV to LEGACY - * happens under the rtnl_lock() this routine is safe - * under the rtnl_lock() + * happens under the netdev instance lock this routine is safe */ void bnxt_vf_reps_close(struct bnxt *bp) { @@ -278,8 +277,7 @@ void bnxt_vf_reps_close(struct bnxt *bp) /* Called when the parent PF interface is opened (re-opened): * As the mode transition from SWITCHDEV to LEGACY - * happen under the rtnl_lock() this routine is safe - * under the rtnl_lock() + * happen under the netdev instance lock this routine is safe */ void bnxt_vf_reps_open(struct bnxt *bp) { @@ -348,7 +346,7 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) /* Ensure that parent PF's and VF-reps' RX/TX has been quiesced * before proceeding with VF-rep cleanup. */ - rtnl_lock(); + netdev_lock(bp->dev); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); closed = true; @@ -365,10 +363,10 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) bnxt_open_nic(bp, false, false); bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; } - rtnl_unlock(); + netdev_unlock(bp->dev); - /* Need to call vf_reps_destroy() outside of rntl_lock - * as unregister_netdev takes rtnl_lock + /* Need to call vf_reps_destroy() outside of netdev instance lock + * as unregister_netdev takes it */ __bnxt_vf_reps_destroy(bp); } @@ -376,7 +374,7 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) /* Free the VF-Reps in firmware, during firmware hot-reset processing. * Note that the VF-Rep netdevs are still active (not unregistered) during * this process. As the mode transition from SWITCHDEV to LEGACY happens - * under the rtnl_lock() this routine is safe under the rtnl_lock(). + * under the netdev instance lock this routine is safe. */ void bnxt_vf_reps_free(struct bnxt *bp) { @@ -413,7 +411,7 @@ static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, /* Allocate the VF-Reps in firmware, during firmware hot-reset processing. * Note that the VF-Rep netdevs are still active (not unregistered) during * this process. As the mode transition from SWITCHDEV to LEGACY happens - * under the rtnl_lock() this routine is safe under the rtnl_lock(). + * under the netdev instance lock this routine is safe. */ int bnxt_vf_reps_alloc(struct bnxt *bp) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index e6c64e4bd66c..0caf6e9bccb8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -382,13 +382,14 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, return nxmit; } -/* Under rtnl_lock */ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { struct net_device *dev = bp->dev; int tx_xdp = 0, tx_cp, rc, tc; struct bpf_prog *old; + netdev_assert_locked(dev); + if (prog && !prog->aux->xdp_has_frags && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { netdev_warn(dev, "MTU %d larger than %d without XDP frag support.\n", diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 029be8342b7b..6dcdcaf518f4 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1968,7 +1968,7 @@ static void gve_turndown(struct gve_priv *priv) netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_TX, NULL); - napi_disable(&block->napi); + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -1979,7 +1979,7 @@ static void gve_turndown(struct gve_priv *priv) netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, NULL); - napi_disable(&block->napi); + napi_disable_locked(&block->napi); } /* Stop tx queues */ @@ -2009,7 +2009,7 @@ static void gve_turnup(struct gve_priv *priv) if (!gve_tx_was_added_to_block(priv, idx)) continue; - napi_enable(&block->napi); + napi_enable_locked(&block->napi); if (idx < priv->tx_cfg.num_queues) netif_queue_set_napi(priv->dev, idx, @@ -2037,7 +2037,7 @@ static void gve_turnup(struct gve_priv *priv) if (!gve_rx_was_added_to_block(priv, idx)) continue; - napi_enable(&block->napi); + napi_enable_locked(&block->napi); netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, &block->napi); @@ -2887,6 +2887,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) priv->suspend_cnt++; rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2895,6 +2896,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) gve_teardown_priv_resources(priv); } priv->up_before_suspend = was_up; + netdev_unlock(netdev); rtnl_unlock(); return 0; } @@ -2907,7 +2909,9 @@ static int gve_resume(struct pci_dev *pdev) priv->resume_cnt++; rtnl_lock(); + netdev_lock(netdev); err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 30fef100257e..ace9b8698021 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -110,13 +110,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx, { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll); - netif_napi_set_irq(&block->napi, block->irq); + netif_napi_add_locked(priv->dev, &block->napi, gve_poll); + netif_napi_set_irq_locked(&block->napi, block->irq); } void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_del(&block->napi); + netif_napi_del_locked(&block->napi); } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 71f11f64b13d..032e1a58af6f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3894,10 +3894,8 @@ exit: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return 0; - netdev_lock(netdev); netif_set_real_num_rx_queues(netdev, total_qps); netif_set_real_num_tx_queues(netdev, total_qps); - netdev_unlock(netdev); return ret; } @@ -4562,22 +4560,21 @@ static int iavf_open(struct net_device *netdev) struct iavf_adapter *adapter = netdev_priv(netdev); int err; + netdev_assert_locked(netdev); + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); return -EIO; } - netdev_lock(netdev); while (!mutex_trylock(&adapter->crit_lock)) { /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock * is already taken and iavf_open is called from an upper * device's notifier reacting on NETDEV_REGISTER event. * We have to leave here to avoid dead lock. */ - if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) { - netdev_unlock(netdev); + if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) return -EBUSY; - } usleep_range(500, 1000); } @@ -4626,7 +4623,6 @@ static int iavf_open(struct net_device *netdev) iavf_irq_enable(adapter, true); mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); return 0; @@ -4639,7 +4635,6 @@ err_setup_tx: iavf_free_all_tx_resources(adapter); err_unlock: mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); return err; } @@ -4661,12 +4656,12 @@ static int iavf_close(struct net_device *netdev) u64 aq_to_restore; int status; - netdev_lock(netdev); + netdev_assert_locked(netdev); + mutex_lock(&adapter->crit_lock); if (adapter->state <= __IAVF_DOWN_PENDING) { mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); return 0; } @@ -4719,6 +4714,7 @@ static int iavf_close(struct net_device *netdev) if (!status) netdev_warn(netdev, "Device resources not yet released\n"); + netdev_lock(netdev); mutex_lock(&adapter->crit_lock); adapter->aq_required |= aq_to_restore; mutex_unlock(&adapter->crit_lock); diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 7ab358616e03..4d191a3293c7 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -107,10 +107,8 @@ nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch) struct netdevsim *ns = netdev_priv(dev); int err; - netdev_lock(dev); err = netif_set_real_num_queues(dev, ch->combined_count, ch->combined_count); - netdev_unlock(dev); if (err) return err; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a41dc79e9c2e..54d03b0628d2 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -402,7 +402,7 @@ static int nsim_init_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { rq = ns->rq[i]; - netif_napi_add_config(dev, &rq->napi, nsim_poll, i); + netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i); } for (i = 0; i < dev->num_rx_queues; i++) { @@ -422,7 +422,7 @@ err_pp_destroy: } for (i = 0; i < dev->num_rx_queues; i++) - __netif_napi_del(&ns->rq[i]->napi); + __netif_napi_del_locked(&ns->rq[i]->napi); return err; } @@ -452,7 +452,7 @@ static void nsim_enable_napi(struct netdevsim *ns) struct nsim_rq *rq = ns->rq[i]; netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi); - napi_enable(&rq->napi); + napi_enable_locked(&rq->napi); } } @@ -461,6 +461,8 @@ static int nsim_open(struct net_device *dev) struct netdevsim *ns = netdev_priv(dev); int err; + netdev_assert_locked(dev); + err = nsim_init_napi(ns); if (err) return err; @@ -478,8 +480,8 @@ static void nsim_del_napi(struct netdevsim *ns) for (i = 0; i < dev->num_rx_queues; i++) { struct nsim_rq *rq = ns->rq[i]; - napi_disable(&rq->napi); - __netif_napi_del(&rq->napi); + napi_disable_locked(&rq->napi); + __netif_napi_del_locked(&rq->napi); } synchronize_net(); @@ -494,6 +496,8 @@ static int nsim_stop(struct net_device *dev) struct netdevsim *ns = netdev_priv(dev); struct netdevsim *peer; + netdev_assert_locked(dev); + netif_carrier_off(dev); peer = rtnl_dereference(ns->peer); if (peer) @@ -679,7 +683,8 @@ nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx) goto err_free; if (!ns->rq_reset_mode) - netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); return 0; @@ -696,7 +701,7 @@ static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem) page_pool_destroy(qmem->pp); if (qmem->rq) { if (!ns->rq_reset_mode) - netif_napi_del(&qmem->rq->napi); + netif_napi_del_locked(&qmem->rq->napi); page_pool_destroy(qmem->rq->page_pool); nsim_queue_free(qmem->rq); } @@ -708,9 +713,11 @@ nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); + netdev_assert_locked(dev); + if (ns->rq_reset_mode == 1) { ns->rq[idx]->page_pool = qmem->pp; - napi_enable(&ns->rq[idx]->napi); + napi_enable_locked(&ns->rq[idx]->napi); return 0; } @@ -718,15 +725,17 @@ nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx) * here we want to test various call orders. */ if (ns->rq_reset_mode == 2) { - netif_napi_del(&ns->rq[idx]->napi); - netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); + netif_napi_del_locked(&ns->rq[idx]->napi); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); } else if (ns->rq_reset_mode == 3) { - netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx); - netif_napi_del(&ns->rq[idx]->napi); + netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll, + idx); + netif_napi_del_locked(&ns->rq[idx]->napi); } ns->rq[idx] = qmem->rq; - napi_enable(&ns->rq[idx]->napi); + napi_enable_locked(&ns->rq[idx]->napi); return 0; } @@ -736,7 +745,9 @@ static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx) struct nsim_queue_mem *qmem = per_queue_mem; struct netdevsim *ns = netdev_priv(dev); - napi_disable(&ns->rq[idx]->napi); + netdev_assert_locked(dev); + + napi_disable_locked(&ns->rq[idx]->napi); if (ns->rq_reset_mode == 1) { qmem->pp = ns->rq[idx]->page_pool; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d4ece538f1b2..4382f5e323b0 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1017,7 +1017,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address_user(tap->dev, &sa, NULL); + ret = dev_set_mac_address(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d8f4d3e996a7..1e645d5e225c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3175,7 +3175,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); + ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ab86ec228b7..d206c9592b60 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2486,19 +2486,29 @@ struct net_device { bool up; /** + * @request_ops_lock: request the core to run all @netdev_ops and + * @ethtool_ops under the @lock. + */ + bool request_ops_lock; + + /** * @lock: netdev-scope lock, protects a small selection of fields. * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * * Protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, - * @net_shaper_hierarchy, @reg_state, @threaded + * @net_shaper_hierarchy, @reg_state, @threaded, @dev_addr * * Partially protects (writers must hold both @lock and rtnl_lock): * @up * * Also protects some fields in struct napi_struct. * + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. + * * Ordering: take after rtnl_lock. */ struct mutex lock; @@ -2620,16 +2630,35 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } +static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, + const struct lockdep_map *b) +{ + /* Only lower devices currently grab the instance lock, so no + * real ordering issues can occur. In the near future, only + * hardware devices will grab instance lock which also does not + * involve any ordering. Suppress lockdep ordering warnings + * until (if) we start grabbing instance lock on pure SW + * devices (bond/team/veth/etc). + */ + if (a == b) + return 0; + return -1; +} + #define netdev_lockdep_set_classes(dev) \ { \ static struct lock_class_key qdisc_tx_busylock_key; \ static struct lock_class_key qdisc_xmit_lock_key; \ static struct lock_class_key dev_addr_list_lock_key; \ + static struct lock_class_key dev_instance_lock_key; \ unsigned int i; \ \ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ lockdep_set_class(&(dev)->addr_list_lock, \ &dev_addr_list_lock_key); \ + lockdep_set_class(&(dev)->lock, \ + &dev_instance_lock_key); \ + lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \ for (i = 0; i < (dev)->num_tx_queues; i++) \ lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ &qdisc_xmit_lock_key); \ @@ -2736,6 +2765,11 @@ static inline void netdev_lock(struct net_device *dev) mutex_lock(&dev->lock); } +static inline bool netdev_trylock(struct net_device *dev) +{ + return mutex_trylock(&dev->lock); +} + static inline void netdev_unlock(struct net_device *dev) { mutex_unlock(&dev->lock); @@ -2753,6 +2787,35 @@ static inline void netdev_assert_locked_or_invisible(struct net_device *dev) netdev_assert_locked(dev); } +static inline bool netdev_need_ops_lock(struct net_device *dev) +{ + bool ret = dev->request_ops_lock || !!dev->queue_mgmt_ops; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + ret |= !!dev->netdev_ops->net_shaper_ops; +#endif + + return ret; +} + +static inline void netdev_lock_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_lock(dev); +} + +static inline void netdev_unlock_ops(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); +} + +static inline void netdev_ops_assert_locked(struct net_device *dev) +{ + if (netdev_need_ops_lock(dev)) + lockdep_assert_held(&dev->lock); +} + void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) @@ -3327,9 +3390,14 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); +void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); +void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, @@ -4177,6 +4245,8 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd); int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg); int generic_hwtstamp_set_lower(struct net_device *dev, @@ -4186,27 +4256,27 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); -int __dev_change_net_namespace(struct net_device *dev, struct net *net, +int netif_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack); -static inline int dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat) -{ - return __dev_change_net_namespace(dev, net, pat, 0, NULL); -} + const char *pat); int __dev_set_mtu(struct net_device *, int); +int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); +int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); @@ -4218,6 +4288,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); @@ -4960,6 +5031,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); +int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 1a4fec330eaa..a10153c3be2d 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -528,10 +528,10 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&offmap->map, attr); - rtnl_lock(); - down_write(&bpf_devs_lock); offmap->netdev = __dev_get_by_index(net, attr->map_ifindex); + netdev_lock_ops(offmap->netdev); + down_write(&bpf_devs_lock); err = bpf_dev_offload_check(offmap->netdev); if (err) goto err_unlock; @@ -548,12 +548,14 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) list_add_tail(&offmap->offloads, &ondev->maps); up_write(&bpf_devs_lock); + netdev_unlock_ops(offmap->netdev); rtnl_unlock(); return &offmap->map; err_unlock: up_write(&bpf_devs_lock); + netdev_unlock_ops(offmap->netdev); rtnl_unlock(); bpf_map_area_free(offmap); return ERR_PTR(err); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 91d134961357..ee3283400716 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -377,7 +377,6 @@ static int vlan_hwtstamp_set(struct net_device *dev, static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; @@ -388,8 +387,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) - err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); + err = dev_eth_ioctl(real_dev, &ifrr, cmd); break; } diff --git a/net/core/Makefile b/net/core/Makefile index d9326600e289..a10c3bd96798 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ +obj-y += dev.o dev_api.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ diff --git a/net/core/dev.c b/net/core/dev.c index 2dc705604509..a0f75a1d1f5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1058,6 +1058,28 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) return __netdev_put_lock(dev); } +/** + * netdev_get_by_name_lock() - find a device by its name + * @net: the applicable net namespace + * @name: name of device + * + * Search for an interface by name. If a valid device + * with @name is found it will be returned with netdev->lock held. + * netdev_unlock() must be called to release it. + * + * Return: pointer to a device with lock held, NULL if not found. + */ +struct net_device *netdev_get_by_name_lock(struct net *net, const char *name) +{ + struct net_device *dev; + + dev = dev_get_by_name(net, name); + if (!dev) + return NULL; + + return __netdev_put_lock(dev); +} + struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index) @@ -1371,15 +1393,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev, return ret < 0 ? ret : 0; } -/** - * dev_change_name - change name of a device - * @dev: device - * @newname: name (or format string) must be at least IFNAMSIZ - * - * Change name of a device, can pass format strings "eth%d". - * for wildcarding. - */ -int dev_change_name(struct net_device *dev, const char *newname) +int netif_change_name(struct net_device *dev, const char *newname) { struct net *net = dev_net(dev); unsigned char old_assign_type; @@ -1449,15 +1463,7 @@ rollback: return err; } -/** - * dev_set_alias - change ifalias of a device - * @dev: device - * @alias: name up to IFALIASZ - * @len: limit of bytes to copy from info - * - * Set ifalias for a device, - */ -int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +int netif_set_alias(struct net_device *dev, const char *alias, size_t len) { struct dev_ifalias *new_alias = NULL; @@ -1483,7 +1489,6 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return len; } -EXPORT_SYMBOL(dev_set_alias); /** * dev_get_alias - get ifalias of a device @@ -1629,6 +1634,8 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) set_bit(__LINK_STATE_START, &dev->state); + netdev_ops_assert_locked(dev); + if (ops->ndo_validate_addr) ret = ops->ndo_validate_addr(dev); @@ -1649,20 +1656,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } -/** - * dev_open - prepare an interface for use. - * @dev: device to open - * @extack: netlink extended ack - * - * Takes a device from down to up state. The device's private open - * function is invoked and then the multicast lists are loaded. Finally - * the device is moved into the up state and a %NETDEV_UP message is - * sent to the netdev notifier chain. - * - * Calling this function on an active interface is a nop. On a failure - * a negative errno code is returned. - */ -int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack) { int ret; @@ -1678,7 +1672,6 @@ int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } -EXPORT_SYMBOL(dev_open); static void __dev_close_many(struct list_head *head) { @@ -1716,6 +1709,9 @@ static void __dev_close_many(struct list_head *head) * We allow it to be called even after a DETACH hot-plug * event. */ + + netdev_ops_assert_locked(dev); + if (ops->ndo_stop) ops->ndo_stop(dev); @@ -1753,16 +1749,7 @@ void dev_close_many(struct list_head *head, bool unlink) } EXPORT_SYMBOL(dev_close_many); -/** - * dev_close - shutdown an interface. - * @dev: device to shutdown - * - * This function moves an active device into down state. A - * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device - * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier - * chain. - */ -void dev_close(struct net_device *dev) +void netif_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1772,18 +1759,27 @@ void dev_close(struct net_device *dev) list_del(&single); } } -EXPORT_SYMBOL(dev_close); +int dev_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int ret; -/** - * dev_disable_lro - disable Large Receive Offload on a device - * @dev: device - * - * Disable Large Receive Offload (LRO) on a net device. Must be - * called under RTNL. This is needed if received packets may be - * forwarded to another interface. - */ -void dev_disable_lro(struct net_device *dev) + ASSERT_RTNL(); + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + netdev_lock_ops(dev); + ret = ops->ndo_setup_tc(dev, type, type_data); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_setup_tc); + +void netif_disable_lro(struct net_device *dev) { struct net_device *lower_dev; struct list_head *iter; @@ -1794,10 +1790,12 @@ void dev_disable_lro(struct net_device *dev) if (unlikely(dev->features & NETIF_F_LRO)) netdev_WARN(dev, "failed to disable LRO!\n"); - netdev_for_each_lower_dev(dev, lower_dev, iter) - dev_disable_lro(lower_dev); + netdev_for_each_lower_dev(dev, lower_dev, iter) { + netdev_lock_ops(lower_dev); + netif_disable_lro(lower_dev); + netdev_unlock_ops(lower_dev); + } } -EXPORT_SYMBOL(dev_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device @@ -6056,7 +6054,7 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) static_branch_dec(&generic_xdp_needed_key); } else if (new && !old) { static_branch_inc(&generic_xdp_needed_key); - dev_disable_lro(dev); + netif_disable_lro(dev); dev_disable_gro_hw(dev); } break; @@ -9228,7 +9226,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) } EXPORT_SYMBOL(dev_set_promiscuity); -static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) +int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags, old_gflags = dev->gflags; unsigned int allmulti, flags; @@ -9263,25 +9261,6 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) -{ - return __dev_set_allmulti(dev, inc, true); -} -EXPORT_SYMBOL(dev_set_allmulti); - /* * Upload unicast and multicast address lists to device and * configure RX filtering. When the device doesn't support unicast @@ -9414,7 +9393,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, int inc = (flags & IFF_ALLMULTI) ? 1 : -1; dev->gflags ^= IFF_ALLMULTI; - __dev_set_allmulti(dev, inc, false); + netif_set_allmulti(dev, inc, false); } return ret; @@ -9449,17 +9428,8 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, } } -/** - * dev_change_flags - change device settings - * @dev: device - * @flags: device state flags - * @extack: netlink extended ack - * - * Change settings on device based state flags. The flags are - * in the userspace exported format. - */ -int dev_change_flags(struct net_device *dev, unsigned int flags, - struct netlink_ext_ack *extack) +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) { int ret; unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; @@ -9472,7 +9442,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, __dev_notify_flags(dev, old_flags, changes, 0, NULL); return ret; } -EXPORT_SYMBOL(dev_change_flags); int __dev_set_mtu(struct net_device *dev, int new_mtu) { @@ -9504,15 +9473,15 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu, } /** - * dev_set_mtu_ext - Change maximum transfer unit + * netif_set_mtu_ext - Change maximum transfer unit * @dev: device * @new_mtu: new transfer unit * @extack: netlink extended ack * * Change the maximum transfer size of the network device. */ -int dev_set_mtu_ext(struct net_device *dev, int new_mtu, - struct netlink_ext_ack *extack) +int netif_set_mtu_ext(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) { int err, orig_mtu; @@ -9550,25 +9519,20 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, return err; } -int dev_set_mtu(struct net_device *dev, int new_mtu) +int netif_set_mtu(struct net_device *dev, int new_mtu) { struct netlink_ext_ack extack; int err; memset(&extack, 0, sizeof(extack)); - err = dev_set_mtu_ext(dev, new_mtu, &extack); + err = netif_set_mtu_ext(dev, new_mtu, &extack); if (err && extack._msg) net_err_ratelimited("%s: %s\n", dev->name, extack._msg); return err; } -EXPORT_SYMBOL(dev_set_mtu); +EXPORT_SYMBOL(netif_set_mtu); -/** - * dev_change_tx_queue_len - Change TX queue length of a netdevice - * @dev: device - * @new_len: new tx queue length - */ -int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +int netif_change_tx_queue_len(struct net_device *dev, unsigned long new_len) { unsigned int orig_len = dev->tx_queue_len; int res; @@ -9595,12 +9559,7 @@ err_rollback: return res; } -/** - * dev_set_group - Change group this device belongs to - * @dev: device - * @new_group: group this device should belong to - */ -void dev_set_group(struct net_device *dev, int new_group) +void netif_set_group(struct net_device *dev, int new_group) { dev->group = new_group; } @@ -9626,16 +9585,8 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, } EXPORT_SYMBOL(dev_pre_changeaddr_notify); -/** - * dev_set_mac_address - Change Media Access Control Address - * @dev: device - * @sa: new address - * @extack: netlink extended ack - * - * Change the hardware (MAC) address of the device - */ -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) +int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; int err; @@ -9659,58 +9610,29 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, add_device_randomness(dev->dev_addr, dev->addr_len); return 0; } -EXPORT_SYMBOL(dev_set_mac_address); - -DECLARE_RWSEM(dev_addr_sem); - -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, - struct netlink_ext_ack *extack) -{ - int ret; - - down_write(&dev_addr_sem); - ret = dev_set_mac_address(dev, sa, extack); - up_write(&dev_addr_sem); - return ret; -} -EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); struct net_device *dev; - int ret = 0; - down_read(&dev_addr_sem); - rcu_read_lock(); + dev = netdev_get_by_name_lock(net, dev_name); + if (!dev) + return -ENODEV; - dev = dev_get_by_name_rcu(net, dev_name); - if (!dev) { - ret = -ENODEV; - goto unlock; - } if (!dev->addr_len) memset(sa->sa_data, 0, size); else memcpy(sa->sa_data, dev->dev_addr, min_t(size_t, size, dev->addr_len)); sa->sa_family = dev->type; + netdev_unlock(dev); -unlock: - rcu_read_unlock(); - up_read(&dev_addr_sem); - return ret; + return 0; } EXPORT_SYMBOL(dev_get_mac_address); -/** - * dev_change_carrier - Change device carrier - * @dev: device - * @new_carrier: new value - * - * Change device carrier - */ -int dev_change_carrier(struct net_device *dev, bool new_carrier) +int netif_change_carrier(struct net_device *dev, bool new_carrier) { const struct net_device_ops *ops = dev->netdev_ops; @@ -9821,13 +9743,7 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); -/** - * dev_change_proto_down - set carrier according to proto_down. - * - * @dev: device - * @proto_down: new value - */ -int dev_change_proto_down(struct net_device *dev, bool proto_down) +int netif_change_proto_down(struct net_device *dev, bool proto_down) { if (!dev->change_proto_down) return -EOPNOTSUPP; @@ -9842,14 +9758,14 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } /** - * dev_change_proto_down_reason - proto down reason + * netdev_change_proto_down_reason_locked - proto down reason * * @dev: device * @mask: proto down mask * @value: proto down value */ -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value) +void netdev_change_proto_down_reason_locked(struct net_device *dev, + unsigned long mask, u32 value) { u32 proto_down_reason; int b; @@ -9938,7 +9854,7 @@ u8 dev_xdp_sb_prog_count(struct net_device *dev) return count; } -int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) { if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; @@ -9958,7 +9874,6 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return dev->netdev_ops->ndo_bpf(dev, bpf); } -EXPORT_SYMBOL_GPL(dev_xdp_propagate); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9988,6 +9903,8 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; + netdev_ops_assert_locked(dev); + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && prog && !prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); @@ -10220,7 +10137,9 @@ static void bpf_xdp_link_release(struct bpf_link *link) * already NULL, in which case link was already auto-detached */ if (xdp_link->dev) { + netdev_lock_ops(xdp_link->dev); WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + netdev_unlock_ops(xdp_link->dev); xdp_link->dev = NULL; } @@ -10302,10 +10221,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } + netdev_lock_ops(xdp_link->dev); mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, xdp_link->flags, new_prog); + netdev_unlock_ops(xdp_link->dev); if (err) goto out_unlock; @@ -10657,6 +10578,7 @@ int __netdev_update_features(struct net_device *dev) int err = -1; ASSERT_RTNL(); + netdev_ops_assert_locked(dev); features = netdev_get_wanted_features(dev); @@ -11090,7 +11012,9 @@ int register_netdevice(struct net_device *dev) if (ret) goto err_uninit_notify; + netdev_lock_ops(dev); __netdev_update_features(dev); + netdev_unlock_ops(dev); /* * Default initial state at registry is that the @@ -12006,11 +11930,14 @@ void unregister_netdevice_many_notify(struct list_head *head, } /* If device is running, close it first. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, unreg_list) { list_add_tail(&dev->close_list, &close_head); + netdev_lock_ops(dev); + } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { + netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); @@ -12027,7 +11954,9 @@ void unregister_netdevice_many_notify(struct list_head *head, /* Shutdown queueing discipline. */ dev_shutdown(dev); dev_tcx_uninstall(dev); + netdev_lock_ops(dev); dev_xdp_uninstall(dev); + netdev_unlock_ops(dev); bpf_dev_bound_netdev_unregister(dev); dev_memory_provider_uninstall(dev); @@ -12123,24 +12052,7 @@ void unregister_netdev(struct net_device *dev) } EXPORT_SYMBOL(unregister_netdev); -/** - * __dev_change_net_namespace - move device to different nethost namespace - * @dev: device - * @net: network namespace - * @pat: If not NULL name pattern to try if the current device name - * is already taken in the destination network namespace. - * @new_ifindex: If not zero, specifies device index in the target - * namespace. - * @extack: netlink extended ack - * - * This function shuts down a device interface and moves it - * to a new network namespace. On success 0 is returned, on - * a failure a netagive errno code is returned. - * - * Callers must hold the rtnl semaphore. - */ - -int __dev_change_net_namespace(struct net_device *dev, struct net *net, +int netif_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack) { @@ -12226,7 +12138,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, */ /* If device is running close it first. */ - dev_close(dev); + netif_close(dev); /* And unlink it from device chain */ unlist_netdevice(dev); @@ -12308,7 +12220,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, out: return err; } -EXPORT_SYMBOL_GPL(__dev_change_net_namespace); static int dev_cpu_dead(unsigned int oldcpu) { diff --git a/net/core/dev.h b/net/core/dev.h index caa13e431a6b..b50ca645c086 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -28,6 +28,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); +struct net_device *netdev_get_by_name_lock(struct net *net, const char *name); struct net_device *__netdev_put_lock(struct net_device *dev); struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, @@ -69,8 +70,6 @@ extern int weight_p; extern int dev_weight_rx_bias; extern int dev_weight_tx_bias; -extern struct rw_semaphore dev_addr_sem; - /* rtnl helpers */ extern struct list_head net_todo_list; void netdev_run_todo(void); @@ -85,6 +84,7 @@ struct netdev_name_node { }; int netdev_get_name(struct net *net, char *name, int ifindex); +int netif_change_name(struct net_device *dev, const char *newname); int dev_change_name(struct net_device *dev, const char *newname); #define netdev_for_each_altname(dev, namenode) \ @@ -98,24 +98,28 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); int dev_validate_mtu(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); +int netif_set_mtu_ext(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int netif_change_proto_down(struct net_device *dev, bool proto_down); int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); +void netdev_change_proto_down_reason_locked(struct net_device *dev, + unsigned long mask, u32 value); typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); +int netif_change_tx_queue_len(struct net_device *dev, unsigned long new_len); int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); +void netif_set_group(struct net_device *dev, int new_group); void dev_set_group(struct net_device *dev, int new_group); +int netif_change_carrier(struct net_device *dev, bool new_carrier); int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); @@ -134,9 +138,11 @@ static inline void netif_set_up(struct net_device *dev, bool value) else dev->flags &= ~IFF_UP; - netdev_lock(dev); + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); dev->up = value; - netdev_unlock(dev); + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); } static inline void netif_set_gso_max_size(struct net_device *dev, diff --git a/net/core/dev_api.c b/net/core/dev_api.c new file mode 100644 index 000000000000..655a95fb7baa --- /dev/null +++ b/net/core/dev_api.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/netdevice.h> + +#include "dev.h" + +/** + * dev_change_name() - change name of a device + * @dev: device + * @newname: name (or format string) must be at least IFNAMSIZ + * + * Change name of a device, can pass format strings "eth%d". + * for wildcarding. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_name(struct net_device *dev, const char *newname) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_name(dev, newname); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_set_alias() - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info + * + * Set ifalias for a device. + * + * Return: 0 on success, -errno on failure. + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_alias(dev, alias, len); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_alias); + +/** + * dev_change_flags() - change device settings + * @dev: device + * @flags: device state flags + * @extack: netlink extended ack + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_flags(dev, flags, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_change_flags); + +/** + * dev_set_group() - change group this device belongs to + * @dev: device + * @new_group: group this device should belong to + */ +void dev_set_group(struct net_device *dev, int new_group) +{ + netdev_lock_ops(dev); + netif_set_group(dev, new_group); + netdev_unlock_ops(dev); +} + +/** + * dev_change_net_namespace() - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_net_namespace(dev, net, pat, 0, NULL); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_change_net_namespace); + +/** + * dev_change_carrier() - change device carrier + * @dev: device + * @new_carrier: new value + * + * Change device carrier + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_carrier(struct net_device *dev, bool new_carrier) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_carrier(dev, new_carrier); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_change_tx_queue_len() - change TX queue length of a netdevice + * @dev: device + * @new_len: new tx queue length + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_tx_queue_len(dev, new_len); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_change_proto_down() - set carrier according to proto_down + * @dev: device + * @proto_down: new value + * + * Return: 0 on success, -errno on failure. + */ +int dev_change_proto_down(struct net_device *dev, bool proto_down) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_change_proto_down(dev, proto_down); + netdev_unlock_ops(dev); + + return ret; +} + +/** + * dev_open() - prepare an interface for use + * @dev: device to open + * @extack: netlink extended ack + * + * Takes a device from down to up state. The device's private open + * function is invoked and then the multicast lists are loaded. Finally + * the device is moved into the up state and a %NETDEV_UP message is + * sent to the netdev notifier chain. + * + * Calling this function on an active interface is a nop. On a failure + * a negative errno code is returned. + * + * Return: 0 on success, -errno on failure. + */ +int dev_open(struct net_device *dev, struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_open(dev, extack); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_open); + +/** + * dev_close() - shutdown an interface + * @dev: device to shutdown + * + * This function moves an active device into down state. A + * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device + * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier + * chain. + */ +void dev_close(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_close(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(dev_close); + +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int ret = -ENODEV; + + if (!ops->ndo_eth_ioctl) + return -EOPNOTSUPP; + + netdev_lock_ops(dev); + if (netif_device_present(dev)) + ret = ops->ndo_eth_ioctl(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_eth_ioctl); + +int dev_set_mtu(struct net_device *dev, int new_mtu) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_mtu(dev, new_mtu); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mtu); + +/** + * dev_disable_lro() - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_disable_lro(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(dev_disable_lro); + +/** + * dev_set_allmulti() - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + * + * Return: 0 on success, -errno on failure. + */ + +int dev_set_allmulti(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_allmulti(dev, inc, true); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_allmulti); + +/** + * dev_set_mac_address() - change Media Access Control Address + * @dev: device + * @sa: new address + * @extack: netlink extended ack + * + * Change the hardware (MAC) address of the device + * + * Return: 0 on success, -errno on failure. + */ +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + netdev_lock(dev); + ret = netif_set_mac_address(dev, sa, extack); + netdev_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address); + +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_xdp_propagate(dev, bpf); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_xdp_propagate); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4c2098ac9d72..296e52d1395d 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -110,7 +110,7 @@ static int dev_getifmap(struct net_device *dev, struct ifreq *ifr) return 0; } -static int dev_setifmap(struct net_device *dev, struct ifreq *ifr) +static int netif_setifmap(struct net_device *dev, struct ifreq *ifr) { struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map; @@ -240,20 +240,6 @@ int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) return 0; } -static int dev_eth_ioctl(struct net_device *dev, - struct ifreq *ifr, unsigned int cmd) -{ - const struct net_device_ops *ops = dev->netdev_ops; - - if (!ops->ndo_eth_ioctl) - return -EOPNOTSUPP; - - if (!netif_device_present(dev)) - return -ENODEV; - - return ops->ndo_eth_ioctl(dev, ifr, cmd); -} - /** * dev_get_hwtstamp_phylib() - Get hardware timestamping settings of NIC * or of attached phylib PHY @@ -305,7 +291,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) return -ENODEV; kernel_cfg.ifr = ifr; + netdev_lock_ops(dev); err = dev_get_hwtstamp_phylib(dev, &kernel_cfg); + netdev_unlock_ops(dev); if (err) return err; @@ -429,7 +417,9 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) if (!netif_device_present(dev)) return -ENODEV; + netdev_lock_ops(dev); err = dev_set_hwtstamp_phylib(dev, &kernel_cfg, &extack); + netdev_unlock_ops(dev); if (err) return err; @@ -504,10 +494,14 @@ static int dev_siocbond(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocbond) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocbond(dev, ifr, cmd); - else - return -ENODEV; + ret = ops->ndo_siocbond(dev, ifr, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -519,10 +513,14 @@ static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocdevprivate) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocdevprivate(dev, ifr, data, cmd); - else - return -ENODEV; + ret = ops->ndo_siocdevprivate(dev, ifr, data, cmd); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -533,10 +531,14 @@ static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs) const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_siocwandev) { + int ret = -ENODEV; + + netdev_lock_ops(dev); if (netif_device_present(dev)) - return ops->ndo_siocwandev(dev, ifs); - else - return -ENODEV; + ret = ops->ndo_siocwandev(dev, ifs); + netdev_unlock_ops(dev); + + return ret; } return -EOPNOTSUPP; @@ -572,7 +574,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; - return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); + return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) @@ -580,11 +582,16 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); + netdev_lock_ops(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + netdev_unlock_ops(dev); return 0; case SIOCSIFMAP: - return dev_setifmap(dev, ifr); + netdev_lock_ops(dev); + err = netif_setifmap(dev, ifr); + netdev_unlock_ops(dev); + return err; case SIOCADDMULTI: if (!ops->ndo_set_rx_mode || @@ -592,7 +599,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCDELMULTI: if (!ops->ndo_set_rx_mode || @@ -600,7 +610,10 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_lock_ops(dev); + err = dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); + netdev_unlock_ops(dev); + return err; case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8d9dc048a548..02d1d40b47ae 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -262,14 +262,11 @@ static ssize_t address_show(struct device *dev, struct device_attribute *attr, struct net_device *ndev = to_net_dev(dev); ssize_t ret = -EINVAL; - down_read(&dev_addr_sem); - - rcu_read_lock(); + netdev_lock(ndev); if (dev_isalive(ndev)) ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len); - rcu_read_unlock(); + netdev_unlock(ndev); - up_read(&dev_addr_sem); return ret; } static DEVICE_ATTR_RO(address); @@ -1483,8 +1480,10 @@ static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, return err; err = -EOPNOTSUPP; + netdev_lock_ops(dev); if (dev->netdev_ops->ndo_set_tx_maxrate) err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); + netdev_unlock_ops(dev); if (!err) { queue->tx_maxrate = rate; diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index ddd54e1e7289..7419c41fd3cb 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -30,6 +30,8 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) goto err_free_new_mem; } + netdev_lock(dev); + err = qops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx); if (err) goto err_free_old_mem; @@ -52,6 +54,8 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) qops->ndo_queue_mem_free(dev, old_mem); + netdev_unlock(dev); + kvfree(old_mem); kvfree(new_mem); @@ -76,6 +80,7 @@ err_free_new_queue_mem: qops->ndo_queue_mem_free(dev, new_mem); err_free_old_mem: + netdev_unlock(dev); kvfree(old_mem); err_free_new_mem: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b4612d305970..88a352b02bce 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2912,12 +2912,19 @@ static int do_set_master(struct net_device *dev, int ifindex, const struct net_device_ops *ops; int err; + /* Release the lower lock, the upper is responsible for locking + * the lower if needed. None of the existing upper devices + * use netdev instance lock, so don't grab it. + */ + if (upper_dev) { if (upper_dev->ifindex == ifindex) return 0; ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { + netdev_unlock_ops(dev); err = ops->ndo_del_slave(upper_dev, dev); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2931,7 +2938,9 @@ static int do_set_master(struct net_device *dev, int ifindex, return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { + netdev_unlock_ops(dev); err = ops->ndo_add_slave(upper_dev, dev, extack); + netdev_lock_ops(dev); if (err) return err; } else { @@ -2981,7 +2990,7 @@ static int do_set_proto_down(struct net_device *dev, if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); - dev_change_proto_down_reason(dev, mask, value); + netdev_change_proto_down_reason_locked(dev, mask, value); } if (nl_proto_down) { @@ -2992,8 +3001,7 @@ static int do_set_proto_down(struct net_device *dev, NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); return -EBUSY; } - err = dev_change_proto_down(dev, - proto_down); + err = netif_change_proto_down(dev, proto_down); if (err) return err; } @@ -3013,6 +3021,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, char ifname[IFNAMSIZ]; int err; + netdev_lock_ops(dev); + err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; @@ -3028,7 +3038,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex, extack); + err = netif_change_net_namespace(dev, tgt_net, pat, + new_ifindex, extack); if (err) goto errout; @@ -3078,7 +3089,11 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev_set_mac_address_user(dev, sa, extack); + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); + err = netif_set_mac_address(dev, sa, extack); + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); kfree(sa); if (err) goto errout; @@ -3086,14 +3101,14 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (tb[IFLA_MTU]) { - err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); + err = netif_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GROUP]) { - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); status |= DO_SETLINK_NOTIFY; } @@ -3103,15 +3118,15 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, * requested. */ if (ifm->ifi_index > 0 && ifname[0]) { - err = dev_change_name(dev, ifname); + err = netif_change_name(dev, ifname); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_IFALIAS]) { - err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), - nla_len(tb[IFLA_IFALIAS])); + err = netif_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; @@ -3123,8 +3138,8 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), - extack); + err = netif_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); if (err < 0) goto errout; } @@ -3137,7 +3152,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, } if (tb[IFLA_CARRIER]) { - err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); + err = netif_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -3146,7 +3161,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, if (tb[IFLA_TXQLEN]) { unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); - err = dev_change_tx_queue_len(dev, value); + err = netif_change_tx_queue_len(dev, value); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -3377,6 +3392,8 @@ errout: dev->name); } + netdev_unlock_ops(dev); + return err; } @@ -3810,6 +3827,8 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, goto out; } + netdev_lock_ops(dev); + err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; @@ -3818,9 +3837,12 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, if (err) goto out_unregister; } + + netdev_unlock_ops(dev); out: return err; out_unregister: + netdev_unlock_ops(dev); if (ops->newlink) { LIST_HEAD(list_kill); diff --git a/net/dsa/conduit.c b/net/dsa/conduit.c index 3dfdb3cb47dc..f21bb2551bed 100644 --- a/net/dsa/conduit.c +++ b/net/dsa/conduit.c @@ -26,7 +26,9 @@ static int dsa_conduit_get_regs_len(struct net_device *dev) int len; if (ops->get_regs_len) { + netdev_lock_ops(dev); len = ops->get_regs_len(dev); + netdev_unlock_ops(dev); if (len < 0) return len; ret += len; @@ -57,11 +59,15 @@ static void dsa_conduit_get_regs(struct net_device *dev, int len; if (ops->get_regs_len && ops->get_regs) { + netdev_lock_ops(dev); len = ops->get_regs_len(dev); - if (len < 0) + if (len < 0) { + netdev_unlock_ops(dev); return; + } regs->len = len; ops->get_regs(dev, regs, data); + netdev_unlock_ops(dev); data += regs->len; } @@ -91,8 +97,10 @@ static void dsa_conduit_get_ethtool_stats(struct net_device *dev, int count = 0; if (ops->get_sset_count && ops->get_ethtool_stats) { + netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_STATS); ops->get_ethtool_stats(dev, stats, data); + netdev_unlock_ops(dev); } if (ds->ops->get_ethtool_stats) @@ -114,8 +122,10 @@ static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev, if (count >= 0) phy_ethtool_get_stats(dev->phydev, stats, data); } else if (ops->get_sset_count && ops->get_ethtool_phy_stats) { + netdev_lock_ops(dev); count = ops->get_sset_count(dev, ETH_SS_PHY_STATS); ops->get_ethtool_phy_stats(dev, stats, data); + netdev_unlock_ops(dev); } if (count < 0) @@ -132,11 +142,13 @@ static int dsa_conduit_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = cpu_dp->ds; int count = 0; + netdev_lock_ops(dev); if (sset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) count = phy_ethtool_get_sset_count(dev->phydev); else if (ops->get_sset_count) count = ops->get_sset_count(dev, sset); + netdev_unlock_ops(dev); if (count < 0) count = 0; @@ -163,6 +175,7 @@ static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; + netdev_lock_ops(dev); if (stringset == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats) { mcount = phy_ethtool_get_sset_count(dev->phydev); @@ -176,6 +189,7 @@ static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, mcount = 0; ops->get_strings(dev, stringset, data); } + netdev_unlock_ops(dev); if (ds->ops->get_strings) { ndata = data + mcount * len; diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index f22051f33868..d4a79310b33f 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -72,23 +72,24 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); phydev = ethnl_req_get_phydev(&req_info, tb[ETHTOOL_A_CABLE_TEST_HEADER], info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ops->start_cable_test(phydev, info->extack); @@ -97,7 +98,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) if (!ret) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; @@ -339,23 +341,24 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); + netdev_lock_ops(dev); phydev = ethnl_req_get_phydev(&req_info, tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test_tdr) { ret = -EOPNOTSUPP; - goto out_rtnl; + goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg); @@ -365,7 +368,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_TDR_NTF); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); out_dev_put: ethnl_parse_header_dev_put(&req_info); diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index 48aef6220f00..946830af3e7c 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -418,8 +418,13 @@ cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb, static int cmis_fw_update_reset(struct net_device *dev) { __u32 reset_data = ETH_RESET_PHY; + int ret; - return dev->ethtool_ops->reset(dev, &reset_data); + netdev_lock_ops(dev); + ret = dev->ethtool_ops->reset(dev, &reset_data); + netdev_unlock_ops(dev); + + return ret; } void diff --git a/net/ethtool/features.c b/net/ethtool/features.c index b6cb101d7f19..ccffd64d5a87 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -234,9 +234,10 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ethnl_features_to_bitmap(old_active, dev->features); ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, @@ -286,7 +287,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) out_ops: ethnl_ops_complete(dev); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e95b41f40cac..496a2774100c 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2317,6 +2317,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) */ busy = true; netdev_hold(dev, &dev_tracker, GFP_KERNEL); + netdev_unlock_ops(dev); rtnl_unlock(); if (rc == 0) { @@ -2331,8 +2332,10 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) do { rtnl_lock(); + netdev_lock_ops(dev); rc = ops->set_phys_id(dev, (i++ & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); + netdev_unlock_ops(dev); rtnl_unlock(); if (rc) break; @@ -2341,6 +2344,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) } rtnl_lock(); + netdev_lock_ops(dev); netdev_put(dev, &dev_tracker); busy = false; @@ -3140,6 +3144,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, return -EPERM; } + netdev_lock_ops(dev); if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); @@ -3373,6 +3378,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, out: if (dev->dev.parent) pm_runtime_put(dev->dev.parent); + netdev_unlock_ops(dev); return rc; } diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 6988e07bdcd6..d3d2e135e45e 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -419,19 +419,21 @@ int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = ethnl_module_fw_flash_validate(dev, info->extack); if (ret < 0) - goto out_rtnl; + goto out_unlock; ret = module_flash_fw(dev, tb, skb, info); ethnl_ops_complete(dev); -out_rtnl: +out_unlock: + netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b4c45207fa32..dee36f5cc228 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -90,6 +90,8 @@ int ethnl_ops_begin(struct net_device *dev) if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); + netdev_ops_assert_locked(dev); + if (!netif_device_present(dev) || dev->reg_state >= NETREG_UNREGISTERING) { ret = -ENODEV; @@ -490,7 +492,11 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ethnl_init_reply_data(reply_data, ops, req_info->dev); rtnl_lock(); + if (req_info->dev) + netdev_lock_ops(req_info->dev); ret = ops->prepare_data(req_info, reply_data, info); + if (req_info->dev) + netdev_unlock_ops(req_info->dev); rtnl_unlock(); if (ret < 0) goto err_cleanup; @@ -548,7 +554,9 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); rtnl_lock(); + netdev_lock_ops(ctx->req_info->dev); ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info); + netdev_unlock_ops(ctx->req_info->dev); rtnl_unlock(); if (ret < 0) goto out; @@ -693,6 +701,7 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + netdev_lock_ops(dev); dev->cfg_pending = kmemdup(dev->cfg, sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); if (!dev->cfg_pending) { @@ -720,6 +729,7 @@ out_free_cfg: kfree(dev->cfg_pending); out_tie_cfg: dev->cfg_pending = dev->cfg; + netdev_unlock_ops(dev); rtnl_unlock(); out_dev: ethnl_parse_header_dev_put(&req_info); @@ -777,6 +787,8 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, req_info->dev = dev; req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS; + netdev_ops_assert_locked(dev); + ethnl_init_reply_data(reply_data, ops, dev); ret = ops->prepare_data(req_info, reply_data, &info); if (ret < 0) diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index ed8f690f6bac..2b428bc80c9b 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -158,18 +158,19 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) return ret; rtnl_lock(); + netdev_lock_ops(req_info.base.dev); ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack); if (ret < 0) - goto err_unlock_rtnl; + goto err_unlock; /* No PHY, return early */ if (!req_info.pdn) - goto err_unlock_rtnl; + goto err_unlock; ret = ethnl_phy_reply_size(&req_info.base, info->extack); if (ret < 0) - goto err_unlock_rtnl; + goto err_unlock; reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.base.dev, @@ -178,13 +179,14 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) info, &reply_payload); if (!rskb) { ret = -ENOMEM; - goto err_unlock_rtnl; + goto err_unlock; } ret = ethnl_phy_fill_reply(&req_info.base, rskb); if (ret) goto err_free_msg; + netdev_unlock_ops(req_info.base.dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info.base); genlmsg_end(rskb, reply_payload); @@ -193,7 +195,8 @@ int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) err_free_msg: nlmsg_free(rskb); -err_unlock_rtnl: +err_unlock: + netdev_unlock_ops(req_info.base.dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info.base); return ret; @@ -290,10 +293,15 @@ int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ctx->phy_req_info->base.dev) { - ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb); + dev = ctx->phy_req_info->base.dev; + netdev_lock_ops(dev); + ret = ethnl_phy_dump_one_dev(skb, dev, cb); + netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->ifindex) { + netdev_lock_ops(dev); ret = ethnl_phy_dump_one_dev(skb, dev, cb); + netdev_unlock_ops(dev); if (ret) break; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 58df9ad02ce8..ec41d1d7eefe 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -345,7 +345,9 @@ int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex) break; + netdev_lock_ops(dev); ret = rss_dump_one_dev(skb, cb, dev); + netdev_unlock_ops(dev); if (ret) break; } diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 691be6c445b3..73b6a89b8731 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -448,12 +448,15 @@ int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); if (ctx->req_info->base.dev) { - ret = ethnl_tsinfo_dump_one_net_topo(skb, - ctx->req_info->base.dev, - cb); + dev = ctx->req_info->base.dev; + netdev_lock_ops(dev); + ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + netdev_unlock_ops(dev); } else { for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + netdev_lock_ops(dev); ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + netdev_unlock_ops(dev); if (ret < 0 && ret != -EOPNOTSUPP) break; ctx->pos_phyindex = 0; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index e06bc36f49fe..0ec4abded10d 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1175,7 +1175,7 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); down_write(&flowtable->flow_block_lock); - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + err = dev_setup_tc(dev, TC_SETUP_FT, bo); up_write(&flowtable->flow_block_lock); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 64675f1c7f29..b761899c143c 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -390,7 +390,7 @@ static int nft_block_offload_cmd(struct nft_base_chain *chain, nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack); - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + err = dev_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) return err; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3e91cf867eb..f5101c2ffc66 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1279,9 +1279,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * We replay the request because the device may * go away in the mean time. */ + netdev_unlock_ops(dev); rtnl_unlock(); request_module(NET_SCH_ALIAS_PREFIX "%s", name); rtnl_lock(); + netdev_lock_ops(dev); ops = qdisc_lookup_ops(kind); if (ops != NULL) { /* We will try again qdisc_lookup_ops, @@ -1505,27 +1507,18 @@ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = nlmsg_data(n); - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - u32 clid; struct Qdisc *q = NULL; struct Qdisc *p = NULL; + u32 clid; int err; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { @@ -1582,6 +1575,31 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(n); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + netdev_lock_ops(dev); + err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm); + netdev_unlock_ops(dev); + + return err; +} + static bool req_create_or_replace(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && @@ -1601,35 +1619,19 @@ static bool req_change(struct nlmsghdr *n) !(n->nlmsg_flags & NLM_F_EXCL)); } -/* - * Create/change qdisc. - */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm, + bool *replay) { - struct net *net = sock_net(skb->sk); - struct tcmsg *tcm; - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; + struct Qdisc *q = NULL; + struct Qdisc *p = NULL; u32 clid; - struct Qdisc *q, *p; int err; -replay: - /* Reinit, just in case something touches this. */ - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - tcm = nlmsg_data(n); clid = tcm->tcm_parent; - q = p = NULL; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - if (clid) { if (clid != TC_H_ROOT) { @@ -1755,7 +1757,7 @@ replay: } err = qdisc_change(q, tca, extack); if (err == 0) - qdisc_notify(net, skb, n, clid, NULL, q, extack); + qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack); return err; create_n_graft: @@ -1788,8 +1790,10 @@ create_n_graft2: tca, &err, extack); } if (q == NULL) { - if (err == -EAGAIN) - goto replay; + if (err == -EAGAIN) { + *replay = true; + return 0; + } return err; } @@ -1804,6 +1808,41 @@ graft: return 0; } +/* + * Create/change qdisc. + */ +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + struct tcmsg *tcm; + bool replay; + int err; + +replay: + /* Reinit, just in case something touches this. */ + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + tcm = nlmsg_data(n); + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + replay = false; + netdev_lock_ops(dev); + err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay); + netdev_unlock_ops(dev); + if (replay) + goto replay; + + return err; +} + static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, @@ -1888,17 +1927,23 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; + netdev_lock_ops(dev); if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), skb, cb, &q_idx, s_q_idx, - true, tca[TCA_DUMP_INVISIBLE]) < 0) + true, tca[TCA_DUMP_INVISIBLE]) < 0) { + netdev_unlock_ops(dev); goto done; + } dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, cb, &q_idx, s_q_idx, false, - tca[TCA_DUMP_INVISIBLE]) < 0) + tca[TCA_DUMP_INVISIBLE]) < 0) { + netdev_unlock_ops(dev); goto done; + } + netdev_unlock_ops(dev); cont: idx++; @@ -2135,15 +2180,15 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, #endif -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, - struct netlink_ext_ack *extack) +static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack, + struct net_device *dev, + struct nlattr *tca[TCA_MAX + 1], + struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); - struct tcmsg *tcm = nlmsg_data(n); - struct nlattr *tca[TCA_MAX + 1]; - struct net_device *dev; - struct Qdisc *q = NULL; const struct Qdisc_class_ops *cops; + struct Qdisc *q = NULL; unsigned long cl = 0; unsigned long new_cl; u32 portid; @@ -2151,15 +2196,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, u32 qid; int err; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, - rtm_tca_policy, extack); - if (err < 0) - return err; - - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. @@ -2268,6 +2304,31 @@ out: return err; } +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct tcmsg *tcm = nlmsg_data(n); + struct nlattr *tca[TCA_MAX + 1]; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); + if (err < 0) + return err; + + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + netdev_lock_ops(dev); + err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm); + netdev_unlock_ops(dev); + + return err; +} + struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; @@ -2344,20 +2405,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, return 0; } -static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb, + struct tcmsg *tcm, struct net_device *dev) { - struct tcmsg *tcm = nlmsg_data(cb->nlh); - struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; - struct net_device *dev; int t, s_t; - if (nlmsg_len(cb->nlh) < sizeof(*tcm)) - return 0; - dev = dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return 0; - s_t = cb->args[0]; t = 0; @@ -2374,10 +2427,32 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) done: cb->args[0] = t; - dev_put(dev); return skb->len; } +static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int err; + + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) + return 0; + + dev = dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return 0; + + netdev_lock_ops(dev); + err = __tc_dump_tclass(skb, cb, tcm, dev); + netdev_unlock_ops(dev); + + dev_put(dev); + + return err; +} + #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 84bf9f1d4bf2..f864e5d70b40 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1181,6 +1181,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_release; } + netdev_lock_ops(dev); + if (!xs->rx && !xs->tx) { err = -EINVAL; goto out_unlock; @@ -1315,6 +1317,7 @@ out_unlock: smp_wmb(); WRITE_ONCE(xs->state, XSK_BOUND); } + netdev_unlock_ops(dev); out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index c263fb7a68dc..0e6ca568fdee 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/netdevice.h> #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> #include <net/xdp_sock_drv.h> @@ -219,6 +220,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool, bpf.xsk.pool = pool; bpf.xsk.queue_id = queue_id; + netdev_ops_assert_locked(netdev); err = netdev->netdev_ops->ndo_bpf(netdev, &bpf); if (err) goto err_unreg_pool; |