diff options
Diffstat (limited to 'net/core/skmsg.c')
| -rw-r--r-- | net/core/skmsg.c | 87 | 
1 files changed, 74 insertions, 13 deletions
| diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 654182ecf87b..25cdbb20f3a0 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,  	struct scatterlist *sge = sk_msg_elem(msg, i);  	u32 len = sge->length; -	if (charge) -		sk_mem_uncharge(sk, len); -	if (!msg->skb) +	/* When the skb owns the memory we free it from consume_skb path. */ +	if (!msg->skb) { +		if (charge) +			sk_mem_uncharge(sk, len);  		put_page(sg_page(sge)); +	}  	memset(sge, 0, sizeof(*sge));  	return len;  } @@ -397,28 +399,45 @@ out:  }  EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, +						  struct sk_buff *skb)  { -	struct sock *sk = psock->sk; -	int copied = 0, num_sge;  	struct sk_msg *msg; +	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) +		return NULL; + +	if (!sk_rmem_schedule(sk, skb, skb->truesize)) +		return NULL; +  	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);  	if (unlikely(!msg)) -		return -EAGAIN; -	if (!sk_rmem_schedule(sk, skb, skb->len)) { -		kfree(msg); -		return -EAGAIN; -	} +		return NULL;  	sk_msg_init(msg); +	return msg; +} + +static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, +					struct sk_psock *psock, +					struct sock *sk, +					struct sk_msg *msg) +{ +	int num_sge, copied; + +	/* skb linearize may fail with ENOMEM, but lets simply try again +	 * later if this happens. Under memory pressure we don't want to +	 * drop the skb. We need to linearize the skb so that the mapping +	 * in skb_to_sgvec can not error. +	 */ +	if (skb_linearize(skb)) +		return -EAGAIN;  	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);  	if (unlikely(num_sge < 0)) {  		kfree(msg);  		return num_sge;  	} -	sk_mem_charge(sk, skb->len);  	copied = skb->len;  	msg->sg.start = 0;  	msg->sg.size = copied; @@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)  	return copied;  } +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); + +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +{ +	struct sock *sk = psock->sk; +	struct sk_msg *msg; + +	/* If we are receiving on the same sock skb->sk is already assigned, +	 * skip memory accounting and owner transition seeing it already set +	 * correctly. +	 */ +	if (unlikely(skb->sk == sk)) +		return sk_psock_skb_ingress_self(psock, skb); +	msg = sk_psock_create_ingress_msg(sk, skb); +	if (!msg) +		return -EAGAIN; + +	/* This will transition ownership of the data from the socket where +	 * the BPF program was run initiating the redirect to the socket +	 * we will eventually receive this data on. The data will be released +	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied +	 * into user buffers. +	 */ +	skb_set_owner_r(skb, sk); +	return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} + +/* Puts an skb on the ingress queue of the socket already assigned to the + * skb. In this case we do not need to check memory limits or skb_set_owner_r + * because the skb is already accounted for here. + */ +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +{ +	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); +	struct sock *sk = psock->sk; + +	if (unlikely(!msg)) +		return -EAGAIN; +	sk_msg_init(msg); +	return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); +} +  static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,  			       u32 off, u32 len, bool ingress)  { @@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,  		 * retrying later from workqueue.  		 */  		if (skb_queue_empty(&psock->ingress_skb)) { -			err = sk_psock_skb_ingress(psock, skb); +			err = sk_psock_skb_ingress_self(psock, skb);  		}  		if (err < 0) {  			skb_queue_tail(&psock->ingress_skb, skb); | 
