diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/devlink.c | 70 | ||||
| -rw-r--r-- | net/core/gro_cells.c | 7 | ||||
| -rw-r--r-- | net/core/neighbour.c | 2 | ||||
| -rw-r--r-- | net/core/netpoll.c | 22 | ||||
| -rw-r--r-- | net/core/skbuff.c | 2 | ||||
| -rw-r--r-- | net/core/skmsg.c | 87 | 
6 files changed, 150 insertions, 40 deletions
| diff --git a/net/core/devlink.c b/net/core/devlink.c index a932d95be798..8c5ddffd707d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l  	return test_bit(limit, &devlink->ops->reload_limits);  } -static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action, +static int devlink_reload_stat_put(struct sk_buff *msg,  				   enum devlink_reload_limit limit, u32 value)  {  	struct nlattr *reload_stats_entry; @@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti  	if (!reload_stats_entry)  		return -EMSGSIZE; -	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) || -	    nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) || +	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||  	    nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))  		goto nla_put_failure;  	nla_nest_end(msg, reload_stats_entry); @@ -540,7 +539,7 @@ nla_put_failure:  static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)  { -	struct nlattr *reload_stats_attr; +	struct nlattr *reload_stats_attr, *act_info, *act_stats;  	int i, j, stat_idx;  	u32 value; @@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink  	if (!reload_stats_attr)  		return -EMSGSIZE; -	for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { -		/* Remote stats are shown even if not locally supported. Stats -		 * of actions with unspecified limit are shown though drivers -		 * don't need to register unspecified limit. -		 */ -		if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && -		    !devlink_reload_limit_is_supported(devlink, j)) +	for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { +		if ((!is_remote && +		     !devlink_reload_action_is_supported(devlink, i)) || +		    i == DEVLINK_RELOAD_ACTION_UNSPEC)  			continue; -		for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) { -			if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) || -			    i == DEVLINK_RELOAD_ACTION_UNSPEC || +		act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO); +		if (!act_info) +			goto nla_put_failure; + +		if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i)) +			goto action_info_nest_cancel; +		act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS); +		if (!act_stats) +			goto action_info_nest_cancel; + +		for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) { +			/* Remote stats are shown even if not locally supported. +			 * Stats of actions with unspecified limit are shown +			 * though drivers don't need to register unspecified +			 * limit. +			 */ +			if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC && +			     !devlink_reload_limit_is_supported(devlink, j)) ||  			    devlink_reload_combination_is_invalid(i, j))  				continue; @@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink  				value = devlink->stats.reload_stats[stat_idx];  			else  				value = devlink->stats.remote_reload_stats[stat_idx]; -			if (devlink_reload_stat_put(msg, i, j, value)) -				goto nla_put_failure; +			if (devlink_reload_stat_put(msg, j, value)) +				goto action_stats_nest_cancel;  		} +		nla_nest_end(msg, act_stats); +		nla_nest_end(msg, act_info);  	}  	nla_nest_end(msg, reload_stats_attr);  	return 0; +action_stats_nest_cancel: +	nla_nest_cancel(msg, act_stats); +action_info_nest_cancel: +	nla_nest_cancel(msg, act_info);  nla_put_failure:  	nla_nest_cancel(msg, reload_stats_attr);  	return -EMSGSIZE; @@ -755,6 +772,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,  	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))  		goto nla_put_failure; +	/* Hold rtnl lock while accessing port's netdev attributes. */ +	rtnl_lock();  	spin_lock_bh(&devlink_port->type_lock);  	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))  		goto nla_put_failure_type_locked; @@ -763,9 +782,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,  			devlink_port->desired_type))  		goto nla_put_failure_type_locked;  	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { +		struct net *net = devlink_net(devlink_port->devlink);  		struct net_device *netdev = devlink_port->type_dev; -		if (netdev && +		if (netdev && net_eq(net, dev_net(netdev)) &&  		    (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,  				 netdev->ifindex) ||  		     nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, @@ -781,6 +801,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,  			goto nla_put_failure_type_locked;  	}  	spin_unlock_bh(&devlink_port->type_lock); +	rtnl_unlock();  	if (devlink_nl_port_attrs_put(msg, devlink_port))  		goto nla_put_failure;  	if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) @@ -791,6 +812,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,  nla_put_failure_type_locked:  	spin_unlock_bh(&devlink_port->type_lock); +	rtnl_unlock();  nla_put_failure:  	genlmsg_cancel(msg, hdr);  	return -EMSGSIZE; @@ -1448,7 +1470,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,  		err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,  						pool_index, &cur, &max);  		if (err && err != -EOPNOTSUPP) -			return err; +			goto sb_occ_get_failure;  		if (!err) {  			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))  				goto nla_put_failure; @@ -1461,8 +1483,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,  	return 0;  nla_put_failure: +	err = -EMSGSIZE; +sb_occ_get_failure:  	genlmsg_cancel(msg, hdr); -	return -EMSGSIZE; +	return err;  }  static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, @@ -8254,8 +8278,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,  {  	struct devlink_port_attrs *attrs = &devlink_port->attrs; -	if (WARN_ON(devlink_port->registered)) -		return -EEXIST;  	devlink_port->attrs_set = true;  	attrs->flavour = flavour;  	if (attrs->switch_id.id_len) { @@ -8279,6 +8301,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,  {  	int ret; +	if (WARN_ON(devlink_port->registered)) +		return;  	devlink_port->attrs = *attrs;  	ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);  	if (ret) @@ -8301,6 +8325,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro  	struct devlink_port_attrs *attrs = &devlink_port->attrs;  	int ret; +	if (WARN_ON(devlink_port->registered)) +		return;  	ret = __devlink_port_attrs_set(devlink_port,  				       DEVLINK_PORT_FLAVOUR_PCI_PF);  	if (ret) @@ -8326,6 +8352,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro  	struct devlink_port_attrs *attrs = &devlink_port->attrs;  	int ret; +	if (WARN_ON(devlink_port->registered)) +		return;  	ret = __devlink_port_attrs_set(devlink_port,  				       DEVLINK_PORT_FLAVOUR_PCI_VF);  	if (ret) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index e095fb871d91..6eb2e5ec2c50 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -99,9 +99,14 @@ void gro_cells_destroy(struct gro_cells *gcells)  		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);  		napi_disable(&cell->napi); -		netif_napi_del(&cell->napi); +		__netif_napi_del(&cell->napi);  		__skb_queue_purge(&cell->napi_skbs);  	} +	/* This barrier is needed because netpoll could access dev->napi_list +	 * under rcu protection. +	 */ +	synchronize_net(); +  	free_percpu(gcells->cells);  	gcells->cells = NULL;  } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e39e28b0a8d..9500d28a43b0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)  			write_lock(&n->lock);  			if ((n->nud_state == NUD_FAILED) || +			    (tbl->is_multicast && +			     tbl->is_multicast(n->primary_key)) ||  			    time_after(tref, n->updated))  				remove = true;  			write_unlock(&n->lock); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c310c7c1cef7..960948290001 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,6 +29,7 @@  #include <linux/slab.h>  #include <linux/export.h>  #include <linux/if_vlan.h> +#include <net/dsa.h>  #include <net/tcp.h>  #include <net/udp.h>  #include <net/addrconf.h> @@ -657,15 +658,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);  int netpoll_setup(struct netpoll *np)  { -	struct net_device *ndev = NULL; +	struct net_device *ndev = NULL, *dev = NULL; +	struct net *net = current->nsproxy->net_ns;  	struct in_device *in_dev;  	int err;  	rtnl_lock(); -	if (np->dev_name[0]) { -		struct net *net = current->nsproxy->net_ns; +	if (np->dev_name[0])  		ndev = __dev_get_by_name(net, np->dev_name); -	} +  	if (!ndev) {  		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);  		err = -ENODEV; @@ -673,6 +674,19 @@ int netpoll_setup(struct netpoll *np)  	}  	dev_hold(ndev); +	/* bring up DSA management network devices up first */ +	for_each_netdev(net, dev) { +		if (!netdev_uses_dsa(dev)) +			continue; + +		err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); +		if (err < 0) { +			np_err(np, "%s failed to open %s\n", +			       np->dev_name, dev->name); +			goto put; +		} +	} +  	if (netdev_master_upper_dev_get(ndev)) {  		np_err(np, "%s is a slave device, aborting\n", np->dev_name);  		err = -EBUSY; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1ba8f0163744..06c526e0d810 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4549,7 +4549,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)  	if (skb && (skb_next = skb_peek(q))) {  		icmp_next = is_icmp_err_skb(skb_next);  		if (icmp_next) -			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; +			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;  	}  	spin_unlock_irqrestore(&q->lock, flags); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 654182ecf87b..25cdbb20f3a0 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,  	struct scatterlist *sge = sk_msg_elem(msg, i);  	u32 len = sge->length; -	if (charge) -		sk_mem_uncharge(sk, len); -	if (!msg->skb) +	/* When the skb owns the memory we free it from consume_skb path. */ +	if (!msg->skb) { +		if (charge) +			sk_mem_uncharge(sk, len);  		put_page(sg_page(sge)); +	}  	memset(sge, 0, sizeof(*sge));  	return len;  } @@ -397,28 +399,45 @@ out:  }  EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, +						  struct sk_buff *skb)  { -	struct sock *sk = psock->sk; -	int copied = 0, num_sge;  	struct sk_msg *msg; +	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) +		return NULL; + +	if (!sk_rmem_schedule(sk, skb, skb->truesize)) +		return NULL; +  	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);  	if (unlikely(!msg)) -		return -EAGAIN; -	if (!sk_rmem_schedule(sk, skb, skb->len)) { -		kfree(msg); -		return -EAGAIN; -	} +		return NULL;  	sk_msg_init(msg); +	return msg; +} + +static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, +					struct sk_psock *psock, +					struct sock *sk, +					struct sk_msg *msg) +{ +	int num_sge, copied; + +	/* skb linearize may fail with ENOMEM, but lets simply try again +	 * later if this happens. Under memory pressure we don't want to +	 * drop the skb. We need to linearize the skb so that the mapping +	 * in skb_to_sgvec can not error. +	 */ +	if (skb_linearize(skb)) +		return -EAGAIN;  	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);  	if (unlikely(num_sge < 0)) {  		kfree(msg);  		return num_sge;  	} -	sk_mem_charge(sk, skb->len);  	copied = skb->len;  	msg->sg.start = 0;  	msg->sg.size = copied; @@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)  	return copied;  } +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); + +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +{ +	struct sock *sk = psock->sk; +	struct sk_msg *msg; + +	/* If we are receiving on the same sock skb->sk is already assigned, +	 * skip memory accounting and owner transition seeing it already set +	 * correctly. +	 */ +	if (unlikely(skb->sk == sk)) +		return sk_psock_skb_ingress_self(psock, skb); +	msg = sk_psock_create_ingress_msg(sk, skb); +	if (!msg) +		return -EAGAIN; + +	/* This will transition ownership of the data from the socket where +	 * the BPF program was run initiating the redirect to the socket +	 * we will eventually receive this data on. The data will be released +	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied +	 * into user buffers. +	 */ +	skb_set_owner_r(skb, sk); +	return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} + +/* Puts an skb on the ingress queue of the socket already assigned to the + * skb. In this case we do not need to check memory limits or skb_set_owner_r + * because the skb is already accounted for here. + */ +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +{ +	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); +	struct sock *sk = psock->sk; + +	if (unlikely(!msg)) +		return -EAGAIN; +	sk_msg_init(msg); +	return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} +  static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,  			       u32 off, u32 len, bool ingress)  { @@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,  		 * retrying later from workqueue.  		 */  		if (skb_queue_empty(&psock->ingress_skb)) { -			err = sk_psock_skb_ingress(psock, skb); +			err = sk_psock_skb_ingress_self(psock, skb);  		}  		if (err < 0) {  			skb_queue_tail(&psock->ingress_skb, skb); | 
