diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 142 | 
1 files changed, 102 insertions, 40 deletions
| diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 92282f98dc82..df1166b76126 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -701,9 +701,21 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)  	rcu_read_lock();  	hash_location = tcp_parse_md5sig_option(th);  	if (sk && sk_fullsock(sk)) { -		key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) -					&ip_hdr(skb)->saddr, AF_INET); +		const union tcp_md5_addr *addr; +		int l3index; + +		/* sdif set, means packet ingressed via a device +		 * in an L3 domain and inet_iif is set to it. +		 */ +		l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; +		addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; +		key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);  	} else if (hash_location) { +		const union tcp_md5_addr *addr; +		int sdif = tcp_v4_sdif(skb); +		int dif = inet_iif(skb); +		int l3index; +  		/*  		 * active side is lost. Try to find listening socket through  		 * source port, and then find md5 key through listening socket. @@ -714,14 +726,17 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)  		sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,  					     ip_hdr(skb)->saddr,  					     th->source, ip_hdr(skb)->daddr, -					     ntohs(th->source), inet_iif(skb), -					     tcp_v4_sdif(skb)); +					     ntohs(th->source), dif, sdif);  		/* don't send rst if it can't find key */  		if (!sk1)  			goto out; -		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) -					&ip_hdr(skb)->saddr, AF_INET); +		/* sdif set, means packet ingressed via a device +		 * in an L3 domain and dif is set to it. +		 */ +		l3index = sdif ? dif : 0; +		addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; +		key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET);  		if (!key)  			goto out; @@ -905,6 +920,9 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)  static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,  				  struct request_sock *req)  { +	const union tcp_md5_addr *addr; +	int l3index; +  	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV  	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.  	 */ @@ -916,14 +934,15 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,  	 * exception of <SYN> segments, MUST be right-shifted by  	 * Rcv.Wind.Shift bits:  	 */ +	addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; +	l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;  	tcp_v4_send_ack(sk, skb, seq,  			tcp_rsk(req)->rcv_nxt,  			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,  			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,  			req->ts_recent,  			0, -			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, -					  AF_INET), +			tcp_md5_do_lookup(sk, l3index, addr, AF_INET),  			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,  			ip_hdr(skb)->tos);  } @@ -983,7 +1002,7 @@ DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);  EXPORT_SYMBOL(tcp_md5_needed);  /* Find the Key structure for an address.  */ -struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, +struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,  					   const union tcp_md5_addr *addr,  					   int family)  { @@ -1003,7 +1022,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,  	hlist_for_each_entry_rcu(key, &md5sig->head, node) {  		if (key->family != family)  			continue; - +		if (key->l3index && key->l3index != l3index) +			continue;  		if (family == AF_INET) {  			mask = inet_make_mask(key->prefixlen);  			match = (key->addr.a4.s_addr & mask) == @@ -1027,7 +1047,8 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup);  static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,  						      const union tcp_md5_addr *addr, -						      int family, u8 prefixlen) +						      int family, u8 prefixlen, +						      int l3index)  {  	const struct tcp_sock *tp = tcp_sk(sk);  	struct tcp_md5sig_key *key; @@ -1046,6 +1067,8 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,  	hlist_for_each_entry_rcu(key, &md5sig->head, node) {  		if (key->family != family)  			continue; +		if (key->l3index && key->l3index != l3index) +			continue;  		if (!memcmp(&key->addr, addr, size) &&  		    key->prefixlen == prefixlen)  			return key; @@ -1057,23 +1080,26 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,  					 const struct sock *addr_sk)  {  	const union tcp_md5_addr *addr; +	int l3index; +	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), +						 addr_sk->sk_bound_dev_if);  	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; -	return tcp_md5_do_lookup(sk, addr, AF_INET); +	return tcp_md5_do_lookup(sk, l3index, addr, AF_INET);  }  EXPORT_SYMBOL(tcp_v4_md5_lookup);  /* This can be called on a newly created socket, from other files */  int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, -		   int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, -		   gfp_t gfp) +		   int family, u8 prefixlen, int l3index, +		   const u8 *newkey, u8 newkeylen, gfp_t gfp)  {  	/* Add Key to the list */  	struct tcp_md5sig_key *key;  	struct tcp_sock *tp = tcp_sk(sk);  	struct tcp_md5sig_info *md5sig; -	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); +	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);  	if (key) {  		/* Pre-existing entry - just update that one. */  		memcpy(key->key, newkey, newkeylen); @@ -1105,6 +1131,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,  	key->keylen = newkeylen;  	key->family = family;  	key->prefixlen = prefixlen; +	key->l3index = l3index;  	memcpy(&key->addr, addr,  	       (family == AF_INET6) ? sizeof(struct in6_addr) :  				      sizeof(struct in_addr)); @@ -1114,11 +1141,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,  EXPORT_SYMBOL(tcp_md5_do_add);  int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, -		   u8 prefixlen) +		   u8 prefixlen, int l3index)  {  	struct tcp_md5sig_key *key; -	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); +	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);  	if (!key)  		return -ENOENT;  	hlist_del_rcu(&key->node); @@ -1149,7 +1176,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,  {  	struct tcp_md5sig cmd;  	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; +	const union tcp_md5_addr *addr;  	u8 prefixlen = 32; +	int l3index = 0;  	if (optlen < sizeof(cmd))  		return -EINVAL; @@ -1167,16 +1196,34 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,  			return -EINVAL;  	} +	if (optname == TCP_MD5SIG_EXT && +	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { +		struct net_device *dev; + +		rcu_read_lock(); +		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); +		if (dev && netif_is_l3_master(dev)) +			l3index = dev->ifindex; + +		rcu_read_unlock(); + +		/* ok to reference set/not set outside of rcu; +		 * right now device MUST be an L3 master +		 */ +		if (!dev || !l3index) +			return -EINVAL; +	} + +	addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr; +  	if (!cmd.tcpm_keylen) -		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, -				      AF_INET, prefixlen); +		return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);  	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)  		return -EINVAL; -	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, -			      AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen, -			      GFP_KERNEL); +	return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, +			      cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);  }  static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, @@ -1286,7 +1333,8 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);  /* Called with rcu_read_lock() */  static bool tcp_v4_inbound_md5_hash(const struct sock *sk, -				    const struct sk_buff *skb) +				    const struct sk_buff *skb, +				    int dif, int sdif)  {  #ifdef CONFIG_TCP_MD5SIG  	/* @@ -1301,11 +1349,17 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,  	struct tcp_md5sig_key *hash_expected;  	const struct iphdr *iph = ip_hdr(skb);  	const struct tcphdr *th = tcp_hdr(skb); -	int genhash; +	const union tcp_md5_addr *addr;  	unsigned char newhash[16]; +	int genhash, l3index; + +	/* sdif set, means packet ingressed via a device +	 * in an L3 domain and dif is set to the l3mdev +	 */ +	l3index = sdif ? dif : 0; -	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, -					  AF_INET); +	addr = (union tcp_md5_addr *)&iph->saddr; +	hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);  	hash_location = tcp_parse_md5sig_option(th);  	/* We've parsed the options - do we have a hash? */ @@ -1331,11 +1385,11 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,  	if (genhash || memcmp(hash_location, newhash, 16) != 0) {  		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); -		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", +		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",  				     &iph->saddr, ntohs(th->source),  				     &iph->daddr, ntohs(th->dest),  				     genhash ? " tcp_v4_calc_md5_hash failed" -				     : ""); +				     : "", l3index);  		return true;  	}  	return false; @@ -1372,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {  	.syn_ack_timeout =	tcp_syn_ack_timeout,  }; -static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {  	.mss_clamp	=	TCP_MSS_DEFAULT,  #ifdef CONFIG_TCP_MD5SIG  	.req_md5_lookup	=	tcp_v4_md5_lookup, @@ -1419,7 +1473,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,  	struct tcp_sock *newtp;  	struct sock *newsk;  #ifdef CONFIG_TCP_MD5SIG +	const union tcp_md5_addr *addr;  	struct tcp_md5sig_key *key; +	int l3index;  #endif  	struct ip_options_rcu *inet_opt; @@ -1467,9 +1523,10 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,  	tcp_initialize_rcv_mss(newsk);  #ifdef CONFIG_TCP_MD5SIG +	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);  	/* Copy over the MD5 key from the original socket */ -	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, -				AF_INET); +	addr = (union tcp_md5_addr *)&newinet->inet_daddr; +	key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);  	if (key) {  		/*  		 * We're using one, so create a matching key @@ -1477,8 +1534,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,  		 * memory, then we end up not copying the key  		 * across. Shucks.  		 */ -		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, -			       AF_INET, 32, key->key, key->keylen, GFP_ATOMIC); +		tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, +			       key->key, key->keylen, GFP_ATOMIC);  		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);  	}  #endif @@ -1808,6 +1865,7 @@ int tcp_v4_rcv(struct sk_buff *skb)  	struct net *net = dev_net(skb->dev);  	struct sk_buff *skb_to_free;  	int sdif = inet_sdif(skb); +	int dif = inet_iif(skb);  	const struct iphdr *iph;  	const struct tcphdr *th;  	bool refcounted; @@ -1856,7 +1914,7 @@ process:  		struct sock *nsk;  		sk = req->rsk_listener; -		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { +		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {  			sk_drops_add(sk, skb);  			reqsk_put(req);  			goto discard_it; @@ -1914,7 +1972,7 @@ process:  	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))  		goto discard_and_relse; -	if (tcp_v4_inbound_md5_hash(sk, skb)) +	if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))  		goto discard_and_relse;  	nf_reset_ct(skb); @@ -2147,13 +2205,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur)  	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq);  	struct inet_listen_hashbucket *ilb; +	struct hlist_nulls_node *node;  	struct sock *sk = cur;  	if (!sk) {  get_head:  		ilb = &tcp_hashinfo.listening_hash[st->bucket];  		spin_lock(&ilb->lock); -		sk = sk_head(&ilb->head); +		sk = sk_nulls_head(&ilb->nulls_head);  		st->offset = 0;  		goto get_sk;  	} @@ -2161,9 +2220,9 @@ get_head:  	++st->num;  	++st->offset; -	sk = sk_next(sk); +	sk = sk_nulls_next(sk);  get_sk: -	sk_for_each_from(sk) { +	sk_nulls_for_each_from(sk, node) {  		if (!net_eq(sock_net(sk), net))  			continue;  		if (sk->sk_family == afinfo->family) @@ -2619,7 +2678,8 @@ static void __net_exit tcp_sk_exit(struct net *net)  	int cpu;  	if (net->ipv4.tcp_congestion_control) -		module_put(net->ipv4.tcp_congestion_control->owner); +		bpf_module_put(net->ipv4.tcp_congestion_control, +			       net->ipv4.tcp_congestion_control->owner);  	for_each_possible_cpu(cpu)  		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); @@ -2674,6 +2734,7 @@ static int __net_init tcp_sk_init(struct net *net)  	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;  	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;  	net->ipv4.sysctl_tcp_tw_reuse = 2; +	net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;  	cnt = tcp_hashinfo.ehash_mask + 1;  	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2; @@ -2725,7 +2786,8 @@ static int __net_init tcp_sk_init(struct net *net)  	/* Reno is always built in */  	if (!net_eq(net, &init_net) && -	    try_module_get(init_net.ipv4.tcp_congestion_control->owner)) +	    bpf_try_module_get(init_net.ipv4.tcp_congestion_control, +			       init_net.ipv4.tcp_congestion_control->owner))  		net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;  	else  		net->ipv4.tcp_congestion_control = &tcp_reno; | 
