diff options
Diffstat (limited to 'net/ipv4/route.c')
| -rw-r--r-- | net/ipv4/route.c | 100 | 
1 files changed, 62 insertions, 38 deletions
| diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0383e66f59bc..43b69af242e1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs)  {  	u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;  	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; -	u32 old = ACCESS_ONCE(*p_tstamp); +	u32 old = READ_ONCE(*p_tstamp);  	u32 now = (u32)jiffies;  	u32 new, delta = 0; @@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,  	struct fnhe_hash_bucket *hash;  	struct fib_nh_exception *fnhe;  	struct rtable *rt; +	u32 genid, hval;  	unsigned int i;  	int depth; -	u32 hval = fnhe_hashfun(daddr); + +	genid = fnhe_genid(dev_net(nh->nh_dev)); +	hval = fnhe_hashfun(daddr);  	spin_lock_bh(&fnhe_lock); @@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,  	}  	if (fnhe) { +		if (fnhe->fnhe_genid != genid) +			fnhe->fnhe_genid = genid;  		if (gw)  			fnhe->fnhe_gw = gw; -		if (pmtu) { +		if (pmtu)  			fnhe->fnhe_pmtu = pmtu; -			fnhe->fnhe_expires = max(1UL, expires); -		} +		fnhe->fnhe_expires = max(1UL, expires);  		/* Update all cached dsts too */  		rt = rcu_dereference(fnhe->fnhe_rth_input);  		if (rt) @@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,  			fnhe->fnhe_next = hash->chain;  			rcu_assign_pointer(hash->chain, fnhe);  		} -		fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); +		fnhe->fnhe_genid = genid;  		fnhe->fnhe_daddr = daddr;  		fnhe->fnhe_gw = gw;  		fnhe->fnhe_pmtu = pmtu; @@ -1250,7 +1254,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)  static unsigned int ipv4_default_advmss(const struct dst_entry *dst)  {  	unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); -	unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, +	unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,  				    ip_rt_min_advmss);  	return min(advmss, IPV4_MAX_PMTU - header_size); @@ -1267,7 +1271,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)  	if (mtu)  		return mtu; -	mtu = dst->dev->mtu; +	mtu = READ_ONCE(dst->dev->mtu);  	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {  		if (rt->rt_uses_gateway && mtu > 576) @@ -1398,7 +1402,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)  	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);  	struct rtable *rt = (struct rtable *) dst; -	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) +	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))  		kfree(p);  	if (!list_empty(&rt->rt_uncached)) { @@ -1456,7 +1460,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,  		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);  		if (fi->fib_metrics != &dst_default_metrics) {  			rt->dst._metrics |= DST_METRICS_REFCOUNTED; -			atomic_inc(&fi->fib_metrics->refcnt); +			refcount_inc(&fi->fib_metrics->refcnt);  		}  #ifdef CONFIG_IP_ROUTE_CLASSID  		rt->dst.tclassid = nh->nh_tclassid; @@ -1520,43 +1524,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,  EXPORT_SYMBOL(rt_dst_alloc);  /* called in rcu_read_lock() section */ -static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, -				u8 tos, struct net_device *dev, int our) +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, +			  u8 tos, struct net_device *dev, +			  struct in_device *in_dev, u32 *itag)  { -	struct rtable *rth; -	struct in_device *in_dev = __in_dev_get_rcu(dev); -	unsigned int flags = RTCF_MULTICAST; -	u32 itag = 0;  	int err;  	/* Primary sanity checks. */ -  	if (!in_dev)  		return -EINVAL;  	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||  	    skb->protocol != htons(ETH_P_IP)) -		goto e_inval; +		return -EINVAL;  	if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) -		goto e_inval; +		return -EINVAL;  	if (ipv4_is_zeronet(saddr)) {  		if (!ipv4_is_local_multicast(daddr)) -			goto e_inval; +			return -EINVAL;  	} else {  		err = fib_validate_source(skb, saddr, 0, tos, 0, dev, -					  in_dev, &itag); +					  in_dev, itag);  		if (err < 0) -			goto e_err; +			return err;  	} +	return 0; +} + +/* called in rcu_read_lock() section */ +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, +			     u8 tos, struct net_device *dev, int our) +{ +	struct in_device *in_dev = __in_dev_get_rcu(dev); +	unsigned int flags = RTCF_MULTICAST; +	struct rtable *rth; +	u32 itag = 0; +	int err; + +	err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); +	if (err) +		return err; +  	if (our)  		flags |= RTCF_LOCAL;  	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,  			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);  	if (!rth) -		goto e_nobufs; +		return -ENOBUFS;  #ifdef CONFIG_IP_ROUTE_CLASSID  	rth->dst.tclassid = itag; @@ -1572,13 +1589,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,  	skb_dst_set(skb, &rth->dst);  	return 0; - -e_nobufs: -	return -ENOBUFS; -e_inval: -	return -EINVAL; -e_err: -	return err;  } @@ -2236,7 +2246,7 @@ add:  	if (!rth)  		return ERR_PTR(-ENOBUFS); -	rth->rt_iif	= orig_oif ? : 0; +	rth->rt_iif = orig_oif;  	if (res->table)  		rth->rt_table_id = res->table->tb_id; @@ -2439,6 +2449,12 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,  		/* L3 master device is the loopback for that domain */  		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :  			net->loopback_dev; + +		/* make sure orig_oif points to fib result device even +		 * though packet rx/tx happens over loopback or l3mdev +		 */ +		orig_oif = FIB_RES_OIF(*res); +  		fl4->flowi4_oif = dev_out->ifindex;  		flags |= RTCF_LOCAL;  		goto make_route; @@ -2501,7 +2517,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or  	struct rtable *ort = (struct rtable *) dst_orig;  	struct rtable *rt; -	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); +	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);  	if (rt) {  		struct dst_entry *new = &rt->dst; @@ -2750,26 +2766,34 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,  		err = 0;  		if (IS_ERR(rt))  			err = PTR_ERR(rt); +		else +			skb_dst_set(skb, &rt->dst);  	}  	if (err)  		goto errout_free; -	skb_dst_set(skb, &rt->dst);  	if (rtm->rtm_flags & RTM_F_NOTIFY)  		rt->rt_flags |= RTCF_NOTIFY;  	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)  		table_id = rt->rt_table_id; -	if (rtm->rtm_flags & RTM_F_FIB_MATCH) +	if (rtm->rtm_flags & RTM_F_FIB_MATCH) { +		if (!res.fi) { +			err = fib_props[res.type].error; +			if (!err) +				err = -EHOSTUNREACH; +			goto errout_free; +		}  		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,  				    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,  				    rt->rt_type, res.prefix, res.prefixlen,  				    fl4.flowi4_tos, res.fi, 0); -	else +	} else {  		err = rt_fill_info(net, dst, src, table_id, &fl4, skb,  				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); +	}  	if (err < 0)  		goto errout_free; @@ -3018,7 +3042,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;  int __init ip_rt_init(void)  { -	int rc = 0;  	int cpu;  	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); @@ -3067,14 +3090,15 @@ int __init ip_rt_init(void)  	xfrm_init();  	xfrm4_init();  #endif -	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); +	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, +		      RTNL_FLAG_DOIT_UNLOCKED);  #ifdef CONFIG_SYSCTL  	register_pernet_subsys(&sysctl_route_ops);  #endif  	register_pernet_subsys(&rt_genid_ops);  	register_pernet_subsys(&ipv4_inetpeer_ops); -	return rc; +	return 0;  }  #ifdef CONFIG_SYSCTL | 
