diff options
Diffstat (limited to 'net/ipv4/route.c')
| -rw-r--r-- | net/ipv4/route.c | 61 | 
1 files changed, 39 insertions, 22 deletions
| diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f33ad1f383b6..98c6f3429593 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -84,6 +84,7 @@  #include <linux/jhash.h>  #include <net/dst.h>  #include <net/dst_metadata.h> +#include <net/inet_dscp.h>  #include <net/net_namespace.h>  #include <net/ip.h>  #include <net/route.h> @@ -112,14 +113,13 @@  #define DEFAULT_MIN_PMTU (512 + 20 + 20)  #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) - +#define DEFAULT_MIN_ADVMSS 256  static int ip_rt_max_size;  static int ip_rt_redirect_number __read_mostly	= 9;  static int ip_rt_redirect_load __read_mostly	= HZ / 50;  static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));  static int ip_rt_error_cost __read_mostly	= HZ;  static int ip_rt_error_burst __read_mostly	= 5 * HZ; -static int ip_rt_min_advmss __read_mostly	= 256;  static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT; @@ -458,7 +458,7 @@ static u32 *ip_tstamps __read_mostly;   * if one generator is seldom used. This makes hard for an attacker   * to infer how many packets were sent between two points in time.   */ -u32 ip_idents_reserve(u32 hash, int segs) +static u32 ip_idents_reserve(u32 hash, int segs)  {  	u32 bucket, old, now = (u32)jiffies;  	atomic_t *p_id; @@ -479,7 +479,6 @@ u32 ip_idents_reserve(u32 hash, int segs)  	 */  	return atomic_add_return(segs + delta, p_id) - segs;  } -EXPORT_SYMBOL(ip_idents_reserve);  void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)  { @@ -499,6 +498,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)  }  EXPORT_SYMBOL(__ip_select_ident); +static void ip_rt_fix_tos(struct flowi4 *fl4) +{ +	__u8 tos = RT_FL_TOS(fl4); + +	fl4->flowi4_tos = tos & IPTOS_RT_MASK; +	fl4->flowi4_scope = tos & RTO_ONLINK ? +			    RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; +} +  static void __build_flow_key(const struct net *net, struct flowi4 *fl4,  			     const struct sock *sk,  			     const struct iphdr *iph, @@ -824,6 +832,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf  	rt = (struct rtable *) dst;  	__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); +	ip_rt_fix_tos(&fl4);  	__ip_do_redirect(rt, skb, &fl4, true);  } @@ -1048,6 +1057,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,  	struct flowi4 fl4;  	ip_rt_build_flow_key(&fl4, sk, skb); +	ip_rt_fix_tos(&fl4);  	/* Don't make lookup fail for bridged encapsulations */  	if (skb && netif_is_any_bridge_port(skb->dev)) @@ -1122,6 +1132,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)  			goto out;  		new = true; +	} else { +		ip_rt_fix_tos(&fl4);  	}  	__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); @@ -1298,9 +1310,10 @@ static void set_class_tag(struct rtable *rt, u32 tag)  static unsigned int ipv4_default_advmss(const struct dst_entry *dst)  { +	struct net *net = dev_net(dst->dev);  	unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);  	unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, -				    ip_rt_min_advmss); +				    net->ipv4.ip_rt_min_advmss);  	return min(advmss, IPV4_MAX_PMTU - header_size);  } @@ -1485,6 +1498,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)  struct uncached_list {  	spinlock_t		lock;  	struct list_head	head; +	struct list_head	quarantine;  };  static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); @@ -1506,7 +1520,7 @@ void rt_del_uncached_list(struct rtable *rt)  		struct uncached_list *ul = rt->rt_uncached_list;  		spin_lock_bh(&ul->lock); -		list_del(&rt->rt_uncached); +		list_del_init(&rt->rt_uncached);  		spin_unlock_bh(&ul->lock);  	}  } @@ -1521,20 +1535,24 @@ static void ipv4_dst_destroy(struct dst_entry *dst)  void rt_flush_dev(struct net_device *dev)  { -	struct rtable *rt; +	struct rtable *rt, *safe;  	int cpu;  	for_each_possible_cpu(cpu) {  		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); +		if (list_empty(&ul->head)) +			continue; +  		spin_lock_bh(&ul->lock); -		list_for_each_entry(rt, &ul->head, rt_uncached) { +		list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) {  			if (rt->dst.dev != dev)  				continue;  			rt->dst.dev = blackhole_netdev;  			dev_replace_track(dev, blackhole_netdev,  					  &rt->dst.dev_tracker,  					  GFP_ATOMIC); +			list_move(&rt->rt_uncached, &ul->quarantine);  		}  		spin_unlock_bh(&ul->lock);  	} @@ -2258,6 +2276,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,  	/*  	 *	Now we are ready to route packet.  	 */ +	fl4.flowi4_l3mdev = 0;  	fl4.flowi4_oif = 0;  	fl4.flowi4_iif = dev->ifindex;  	fl4.flowi4_mark = skb->mark; @@ -2603,7 +2622,6 @@ add:  struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,  					const struct sk_buff *skb)  { -	__u8 tos = RT_FL_TOS(fl4);  	struct fib_result res = {  		.type		= RTN_UNSPEC,  		.fi		= NULL, @@ -2613,9 +2631,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,  	struct rtable *rth;  	fl4->flowi4_iif = LOOPBACK_IFINDEX; -	fl4->flowi4_tos = tos & IPTOS_RT_MASK; -	fl4->flowi4_scope = ((tos & RTO_ONLINK) ? -			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); +	ip_rt_fix_tos(fl4);  	rcu_read_lock();  	rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); @@ -2733,8 +2749,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,  		res->fi = NULL;  		res->table = NULL;  		if (fl4->flowi4_oif && -		    (ipv4_is_multicast(fl4->daddr) || -		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) { +		    (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {  			/* Apparently, routing tables are wrong. Assume,  			 * that the destination is on link.  			 * @@ -3392,7 +3407,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  				if (fa->fa_slen == slen &&  				    fa->tb_id == fri.tb_id && -				    fa->fa_tos == fri.tos && +				    fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&  				    fa->fa_info == res.fi &&  				    fa->fa_type == fri.type) {  					fri.offload = READ_ONCE(fa->offload); @@ -3535,13 +3550,6 @@ static struct ctl_table ipv4_route_table[] = {  		.mode		= 0644,  		.proc_handler	= proc_dointvec,  	}, -	{ -		.procname	= "min_adv_mss", -		.data		= &ip_rt_min_advmss, -		.maxlen		= sizeof(int), -		.mode		= 0644, -		.proc_handler	= proc_dointvec, -	},  	{ }  }; @@ -3569,6 +3577,13 @@ static struct ctl_table ipv4_route_netns_table[] = {  		.mode           = 0644,  		.proc_handler   = proc_dointvec_jiffies,  	}, +	{ +		.procname   = "min_adv_mss", +		.data       = &init_net.ipv4.ip_rt_min_advmss, +		.maxlen     = sizeof(int), +		.mode       = 0644, +		.proc_handler   = proc_dointvec, +	},  	{ },  }; @@ -3631,6 +3646,7 @@ static __net_init int netns_ip_rt_init(struct net *net)  	/* Set default value for namespaceified sysctls */  	net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;  	net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; +	net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;  	return 0;  } @@ -3705,6 +3721,7 @@ int __init ip_rt_init(void)  		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);  		INIT_LIST_HEAD(&ul->head); +		INIT_LIST_HEAD(&ul->quarantine);  		spin_lock_init(&ul->lock);  	}  #ifdef CONFIG_IP_ROUTE_CLASSID | 
