diff options
Diffstat (limited to 'drivers/net/vxlan.c')
| -rw-r--r-- | drivers/net/vxlan.c | 511 | 
1 files changed, 318 insertions, 193 deletions
| diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 328b4712683c..96aa7e6cf214 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -59,6 +59,8 @@ static const u8 all_zeros_mac[ETH_ALEN + 2];  static int vxlan_sock_add(struct vxlan_dev *vxlan); +static void vxlan_vs_del_dev(struct vxlan_dev *vxlan); +  /* per-network namespace private data for this module */  struct vxlan_net {  	struct list_head  vxlan_list; @@ -224,26 +226,37 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,  	return NULL;  } -static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni) +static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex, +					   __be32 vni)  { -	struct vxlan_dev *vxlan; +	struct vxlan_dev_node *node;  	/* For flow based devices, map all packets to VNI 0 */  	if (vs->flags & VXLAN_F_COLLECT_METADATA)  		vni = 0; -	hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) { -		if (vxlan->default_dst.remote_vni == vni) -			return vxlan; +	hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) { +		if (node->vxlan->default_dst.remote_vni != vni) +			continue; + +		if (IS_ENABLED(CONFIG_IPV6)) { +			const struct vxlan_config *cfg = &node->vxlan->cfg; + +			if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) && +			    cfg->remote_ifindex != ifindex) +				continue; +		} + +		return node->vxlan;  	}  	return NULL;  }  /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni, -					sa_family_t family, __be16 port, -					u32 flags) +static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex, +					__be32 vni, sa_family_t family, +					__be16 port, u32 flags)  {  	struct vxlan_sock *vs; @@ -251,7 +264,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,  	if (!vs)  		return NULL; -	return vxlan_vs_find_vni(vs, vni); +	return vxlan_vs_find_vni(vs, ifindex, vni);  }  /* Fill in neighbour message in skbuff. */ @@ -303,7 +316,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,  	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&  	    nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))  		goto nla_put_failure; -	if ((vxlan->flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && +	if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&  	    nla_put_u32(skb, NDA_SRC_VNI,  			be32_to_cpu(fdb->vni)))  		goto nla_put_failure; @@ -417,7 +430,7 @@ static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)  static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,  						const u8 *mac, __be32 vni)  { -	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) +	if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)  		return &vxlan->fdb_head[eth_vni_hash(mac, vni)];  	else  		return &vxlan->fdb_head[eth_hash(mac)]; @@ -432,7 +445,7 @@ static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,  	hlist_for_each_entry_rcu(f, head, hlist) {  		if (ether_addr_equal(mac, f->eth_addr)) { -			if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { +			if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {  				if (vni == f->vni)  					return f;  			} else { @@ -740,6 +753,22 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)  	call_rcu(&f->rcu, vxlan_fdb_free);  } +static void vxlan_dst_free(struct rcu_head *head) +{ +	struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu); + +	dst_cache_destroy(&rd->dst_cache); +	kfree(rd); +} + +static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, +				  struct vxlan_rdst *rd) +{ +	list_del_rcu(&rd->list); +	vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); +	call_rcu(&rd->rcu, vxlan_dst_free); +} +  static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,  			   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,  			   __be32 *vni, u32 *ifindex) @@ -864,9 +893,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,  	 * otherwise destroy the fdb entry  	 */  	if (rd && !list_is_singular(&f->remotes)) { -		list_del_rcu(&rd->list); -		vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH); -		kfree_rcu(rd, rcu); +		vxlan_fdb_dst_destroy(vxlan, f, rd);  		goto out;  	} @@ -941,20 +968,28 @@ out:   */  static bool vxlan_snoop(struct net_device *dev,  			union vxlan_addr *src_ip, const u8 *src_mac, -			__be32 vni) +			u32 src_ifindex, __be32 vni)  {  	struct vxlan_dev *vxlan = netdev_priv(dev);  	struct vxlan_fdb *f; +	u32 ifindex = 0; + +#if IS_ENABLED(CONFIG_IPV6) +	if (src_ip->sa.sa_family == AF_INET6 && +	    (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)) +		ifindex = src_ifindex; +#endif  	f = vxlan_find_mac(vxlan, src_mac, vni);  	if (likely(f)) {  		struct vxlan_rdst *rdst = first_remote_rcu(f); -		if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip))) +		if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && +			   rdst->remote_ifindex == ifindex))  			return false;  		/* Don't migrate static entries, drop packets */ -		if (f->state & NUD_NOARP) +		if (f->state & (NUD_PERMANENT | NUD_NOARP))  			return true;  		if (net_ratelimit()) @@ -977,7 +1012,7 @@ static bool vxlan_snoop(struct net_device *dev,  					 vxlan->cfg.dst_port,  					 vni,  					 vxlan->default_dst.remote_vni, -					 0, NTF_SELF); +					 ifindex, NTF_SELF);  		spin_unlock(&vxlan->hash_lock);  	} @@ -999,11 +1034,11 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)  	/* The vxlan_sock is only used by dev, leaving group has  	 * no effect on other vxlan devices.  	 */ -	if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1) +	if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1)  		return false;  #if IS_ENABLED(CONFIG_IPV6)  	sock6 = rtnl_dereference(dev->vn6_sock); -	if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1) +	if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1)  		return false;  #endif @@ -1040,7 +1075,7 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)  	if (!vs)  		return false; -	if (!atomic_dec_and_test(&vs->refcnt)) +	if (!refcount_dec_and_test(&vs->refcnt))  		return false;  	vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id); @@ -1061,12 +1096,14 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)  #if IS_ENABLED(CONFIG_IPV6)  	struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); -	rcu_assign_pointer(vxlan->vn6_sock, NULL); +	RCU_INIT_POINTER(vxlan->vn6_sock, NULL);  #endif -	rcu_assign_pointer(vxlan->vn4_sock, NULL); +	RCU_INIT_POINTER(vxlan->vn4_sock, NULL);  	synchronize_net(); +	vxlan_vs_del_dev(vxlan); +  	if (__vxlan_sock_release_prep(sock4)) {  		udp_tunnel_sock_release(sock4->sock);  		kfree(sock4); @@ -1246,6 +1283,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,  			  struct sk_buff *skb, __be32 vni)  {  	union vxlan_addr saddr; +	u32 ifindex = skb->dev->ifindex;  	skb_reset_mac_header(skb);  	skb->protocol = eth_type_trans(skb, vxlan->dev); @@ -1266,8 +1304,8 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,  #endif  	} -	if ((vxlan->flags & VXLAN_F_LEARN) && -	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, vni)) +	if ((vxlan->cfg.flags & VXLAN_F_LEARN) && +	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))  		return false;  	return true; @@ -1333,7 +1371,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)  	vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); -	vxlan = vxlan_vs_find_vni(vs, vni); +	vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);  	if (!vxlan)  		goto drop; @@ -1489,7 +1527,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)  		if (netif_rx_ni(reply) == NET_RX_DROP)  			dev->stats.rx_dropped++; -	} else if (vxlan->flags & VXLAN_F_L3MISS) { +	} else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {  		union vxlan_addr ipa = {  			.sin.sin_addr.s_addr = tip,  			.sin.sin_family = AF_INET, @@ -1566,10 +1604,8 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,  	skb_pull(reply, sizeof(struct ipv6hdr));  	skb_reset_transport_header(reply); -	na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); -  	/* Neighbor Advertisement */ -	memset(na, 0, sizeof(*na)+na_olen); +	na = skb_put_zero(reply, sizeof(*na) + na_olen);  	na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;  	na->icmph.icmp6_router = isrouter;  	na->icmph.icmp6_override = 1; @@ -1649,7 +1685,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)  		if (netif_rx_ni(reply) == NET_RX_DROP)  			dev->stats.rx_dropped++; -	} else if (vxlan->flags & VXLAN_F_L3MISS) { +	} else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {  		union vxlan_addr ipa = {  			.sin6.sin6_addr = msg->target,  			.sin6.sin6_family = AF_INET6, @@ -1682,7 +1718,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)  			return false;  		pip = ip_hdr(skb);  		n = neigh_lookup(&arp_tbl, &pip->daddr, dev); -		if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { +		if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {  			union vxlan_addr ipa = {  				.sin.sin_addr.s_addr = pip->daddr,  				.sin.sin_family = AF_INET, @@ -1703,7 +1739,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)  			return false;  		pip6 = ipv6_hdr(skb);  		n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev); -		if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { +		if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {  			union vxlan_addr ipa = {  				.sin6.sin6_addr = pip6->daddr,  				.sin6.sin6_family = AF_INET6, @@ -1811,7 +1847,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,  	if (err)  		return err; -	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); +	vxh = __skb_push(skb, sizeof(*vxh));  	vxh->vx_flags = VXLAN_HF_VNI;  	vxh->vx_vni = vxlan_vni_field(vni); @@ -1977,8 +2013,9 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,  #endif  	} -	if (dst_vxlan->flags & VXLAN_F_LEARN) -		vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, vni); +	if (dst_vxlan->cfg.flags & VXLAN_F_LEARN) +		vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0, +			    vni);  	u64_stats_update_begin(&tx_stats->syncp);  	tx_stats->tx_packets++; @@ -1996,8 +2033,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,  }  static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, -				 struct vxlan_dev *vxlan, union vxlan_addr *daddr, -				 __be16 dst_port, __be32 vni, struct dst_entry *dst, +				 struct vxlan_dev *vxlan, +				 union vxlan_addr *daddr, +				 __be16 dst_port, int dst_ifindex, __be32 vni, +				 struct dst_entry *dst,  				 u32 rt_flags)  {  #if IS_ENABLED(CONFIG_IPV6) @@ -2013,9 +2052,9 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,  		struct vxlan_dev *dst_vxlan;  		dst_release(dst); -		dst_vxlan = vxlan_find_vni(vxlan->net, vni, +		dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,  					   daddr->sa.sa_family, dst_port, -					   vxlan->flags); +					   vxlan->cfg.flags);  		if (!dst_vxlan) {  			dev->stats.tx_errors++;  			kfree_skb(skb); @@ -2045,8 +2084,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  	struct dst_entry *ndst = NULL;  	__be32 vni, label;  	__u8 tos, ttl; +	int ifindex;  	int err; -	u32 flags = vxlan->flags; +	u32 flags = vxlan->cfg.flags;  	bool udp_sum = false;  	bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev)); @@ -2065,6 +2105,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  		dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;  		vni = (rdst->remote_vni) ? : default_vni; +		ifindex = rdst->remote_ifindex;  		local_ip = vxlan->cfg.saddr;  		dst_cache = &rdst->dst_cache;  		md->gbp = skb->mark; @@ -2098,6 +2139,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  		dst = &remote_ip;  		dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;  		vni = tunnel_id_to_key32(info->key.tun_id); +		ifindex = 0;  		dst_cache = &info->dst_cache;  		if (info->options_len)  			md = ip_tunnel_info_opts(info); @@ -2115,8 +2157,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  		struct rtable *rt;  		__be16 df = 0; -		rt = vxlan_get_route(vxlan, dev, sock4, skb, -				     rdst ? rdst->remote_ifindex : 0, tos, +		rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,  				     dst->sin.sin_addr.s_addr,  				     &local_ip.sin.sin_addr.s_addr,  				     dst_port, src_port, @@ -2129,8 +2170,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  		/* Bypass encapsulation if the destination is local */  		if (!info) {  			err = encap_bypass_if_local(skb, dev, vxlan, dst, -						    dst_port, vni, &rt->dst, -						    rt->rt_flags); +						    dst_port, ifindex, vni, +						    &rt->dst, rt->rt_flags);  			if (err)  				goto out_unlock;  		} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { @@ -2152,8 +2193,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  	} else {  		struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); -		ndst = vxlan6_get_route(vxlan, dev, sock6, skb, -					rdst ? rdst->remote_ifindex : 0, tos, +		ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,  					label, &dst->sin6.sin6_addr,  					&local_ip.sin6.sin6_addr,  					dst_port, src_port, @@ -2168,8 +2208,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,  			u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;  			err = encap_bypass_if_local(skb, dev, vxlan, dst, -						    dst_port, vni, ndst, -						    rt6i_flags); +						    dst_port, ifindex, vni, +						    ndst, rt6i_flags);  			if (err)  				goto out_unlock;  		} @@ -2228,7 +2268,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)  	skb_reset_mac_header(skb); -	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { +	if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {  		if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&  		    info->mode & IP_TUNNEL_INFO_TX) {  			vni = tunnel_id_to_key32(info->key.tun_id); @@ -2241,7 +2281,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)  		}  	} -	if (vxlan->flags & VXLAN_F_PROXY) { +	if (vxlan->cfg.flags & VXLAN_F_PROXY) {  		eth = eth_hdr(skb);  		if (ntohs(eth->h_proto) == ETH_P_ARP)  			return arp_reduce(dev, skb, vni); @@ -2261,7 +2301,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)  	f = vxlan_find_mac(vxlan, eth->h_dest, vni);  	did_rsc = false; -	if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && +	if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) &&  	    (ntohs(eth->h_proto) == ETH_P_IP ||  	     ntohs(eth->h_proto) == ETH_P_IPV6)) {  		did_rsc = route_shortcircuit(dev, skb); @@ -2272,7 +2312,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)  	if (f == NULL) {  		f = vxlan_find_mac(vxlan, all_zeros_mac, vni);  		if (f == NULL) { -			if ((vxlan->flags & VXLAN_F_L2MISS) && +			if ((vxlan->cfg.flags & VXLAN_F_L2MISS) &&  			    !is_multicast_ether_addr(eth->h_dest))  				vxlan_fdb_miss(vxlan, eth->h_dest); @@ -2342,13 +2382,27 @@ static void vxlan_cleanup(unsigned long arg)  	mod_timer(&vxlan->age_timer, next_timer);  } -static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan) +static void vxlan_vs_del_dev(struct vxlan_dev *vxlan) +{ +	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); + +	spin_lock(&vn->sock_lock); +	hlist_del_init_rcu(&vxlan->hlist4.hlist); +#if IS_ENABLED(CONFIG_IPV6) +	hlist_del_init_rcu(&vxlan->hlist6.hlist); +#endif +	spin_unlock(&vn->sock_lock); +} + +static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan, +			     struct vxlan_dev_node *node)  {  	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);  	__be32 vni = vxlan->default_dst.remote_vni; +	node->vxlan = vxlan;  	spin_lock(&vn->sock_lock); -	hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni)); +	hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));  	spin_unlock(&vn->sock_lock);  } @@ -2459,10 +2513,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)  	struct vxlan_rdst *dst = &vxlan->default_dst;  	struct net_device *lowerdev = __dev_get_by_index(vxlan->net,  							 dst->remote_ifindex); -	bool use_ipv6 = false; - -	if (dst->remote_ip.sa.sa_family == AF_INET6) -		use_ipv6 = true; +	bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);  	/* This check is different than dev->max_mtu, because it looks at  	 * the lowerdev->mtu, rather than the static dev->max_mtu @@ -2584,7 +2635,7 @@ static void vxlan_setup(struct net_device *dev)  	eth_hw_addr_random(dev);  	ether_setup(dev); -	dev->destructor = free_netdev; +	dev->needs_free_netdev = true;  	SET_NETDEV_DEVTYPE(dev, &vxlan_type);  	dev->features	|= NETIF_F_LLTX; @@ -2598,6 +2649,10 @@ static void vxlan_setup(struct net_device *dev)  	netif_keep_dst(dev);  	dev->priv_flags |= IFF_NO_QUEUE; +	/* MTU range: 68 - 65535 */ +	dev->min_mtu = ETH_MIN_MTU; +	dev->max_mtu = ETH_MAX_MTU; +  	INIT_LIST_HEAD(&vxlan->next);  	spin_lock_init(&vxlan->hash_lock); @@ -2605,8 +2660,6 @@ static void vxlan_setup(struct net_device *dev)  	vxlan->age_timer.function = vxlan_cleanup;  	vxlan->age_timer.data = (unsigned long) vxlan; -	vxlan->cfg.dst_port = htons(vxlan_port); -  	vxlan->dev = dev;  	gro_cells_init(&vxlan->gro_cells, dev); @@ -2662,7 +2715,8 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {  	[IFLA_VXLAN_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG },  }; -static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) +static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], +			  struct netlink_ext_ack *extack)  {  	if (tb[IFLA_ADDRESS]) {  		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { @@ -2676,11 +2730,19 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])  		}  	} +	if (tb[IFLA_MTU]) { +		u32 mtu = nla_get_u32(tb[IFLA_MTU]); + +		if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) +			return -EINVAL; +	} +  	if (!data)  		return -EINVAL;  	if (data[IFLA_VXLAN_ID]) { -		__u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); +		u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); +  		if (id >= VXLAN_N_VID)  			return -ERANGE;  	} @@ -2763,7 +2825,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,  	}  	vs->sock = sock; -	atomic_set(&vs->refcnt, 1); +	refcount_set(&vs->refcnt, 1);  	vs->flags = (flags & VXLAN_F_RCV_FLAGS);  	spin_lock(&vn->sock_lock); @@ -2792,12 +2854,13 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)  {  	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);  	struct vxlan_sock *vs = NULL; +	struct vxlan_dev_node *node;  	if (!vxlan->cfg.no_share) {  		spin_lock(&vn->sock_lock);  		vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, -				     vxlan->cfg.dst_port, vxlan->flags); -		if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) { +				     vxlan->cfg.dst_port, vxlan->cfg.flags); +		if (vs && !refcount_inc_not_zero(&vs->refcnt)) {  			spin_unlock(&vn->sock_lock);  			return -EBUSY;  		} @@ -2805,23 +2868,27 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)  	}  	if (!vs)  		vs = vxlan_socket_create(vxlan->net, ipv6, -					 vxlan->cfg.dst_port, vxlan->flags); +					 vxlan->cfg.dst_port, vxlan->cfg.flags);  	if (IS_ERR(vs))  		return PTR_ERR(vs);  #if IS_ENABLED(CONFIG_IPV6) -	if (ipv6) +	if (ipv6) {  		rcu_assign_pointer(vxlan->vn6_sock, vs); -	else +		node = &vxlan->hlist6; +	} else  #endif +	{  		rcu_assign_pointer(vxlan->vn4_sock, vs); -	vxlan_vs_add_dev(vs, vxlan); +		node = &vxlan->hlist4; +	} +	vxlan_vs_add_dev(vs, vxlan, node);  	return 0;  }  static int vxlan_sock_add(struct vxlan_dev *vxlan)  { -	bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA; -	bool ipv6 = vxlan->flags & VXLAN_F_IPV6 || metadata; +	bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA; +	bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;  	bool ipv4 = !ipv6 || metadata;  	int ret = 0; @@ -2841,116 +2908,176 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)  	return ret;  } -static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, -			       struct vxlan_config *conf, -			       bool changelink) +static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, +				 struct net_device **lower, +				 struct vxlan_dev *old)  {  	struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); -	struct vxlan_dev *vxlan = netdev_priv(dev), *tmp; -	struct vxlan_rdst *dst = &vxlan->default_dst; -	unsigned short needed_headroom = ETH_HLEN; +	struct vxlan_dev *tmp;  	bool use_ipv6 = false; -	__be16 default_port = vxlan->cfg.dst_port; -	struct net_device *lowerdev = NULL; -	if (!changelink) { -		if (conf->flags & VXLAN_F_GPE) { -			/* For now, allow GPE only together with -			 * COLLECT_METADATA. This can be relaxed later; in such -			 * case, the other side of the PtP link will have to be -			 * provided. -			 */ -			if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) || -			    !(conf->flags & VXLAN_F_COLLECT_METADATA)) { -				pr_info("unsupported combination of extensions\n"); -				return -EINVAL; -			} -			vxlan_raw_setup(dev); -		} else { -			vxlan_ether_setup(dev); +	if (conf->flags & VXLAN_F_GPE) { +		/* For now, allow GPE only together with +		 * COLLECT_METADATA. This can be relaxed later; in such +		 * case, the other side of the PtP link will have to be +		 * provided. +		 */ +		if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) || +		    !(conf->flags & VXLAN_F_COLLECT_METADATA)) { +			return -EINVAL;  		} - -		/* MTU range: 68 - 65535 */ -		dev->min_mtu = ETH_MIN_MTU; -		dev->max_mtu = ETH_MAX_MTU; -		vxlan->net = src_net;  	} -	dst->remote_vni = conf->vni; +	if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) { +		/* Unless IPv6 is explicitly requested, assume IPv4 */ +		conf->remote_ip.sa.sa_family = AF_INET; +		conf->saddr.sa.sa_family = AF_INET; +	} else if (!conf->remote_ip.sa.sa_family) { +		conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family; +	} else if (!conf->saddr.sa.sa_family) { +		conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family; +	} -	memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip)); +	if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) +		return -EINVAL; -	/* Unless IPv6 is explicitly requested, assume IPv4 */ -	if (!dst->remote_ip.sa.sa_family) -		dst->remote_ip.sa.sa_family = AF_INET; +	if (vxlan_addr_multicast(&conf->saddr)) +		return -EINVAL; -	if (dst->remote_ip.sa.sa_family == AF_INET6 || -	    vxlan->cfg.saddr.sa.sa_family == AF_INET6) { +	if (conf->saddr.sa.sa_family == AF_INET6) {  		if (!IS_ENABLED(CONFIG_IPV6))  			return -EPFNOSUPPORT;  		use_ipv6 = true; -		vxlan->flags |= VXLAN_F_IPV6; +		conf->flags |= VXLAN_F_IPV6; + +		if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) { +			int local_type = +				ipv6_addr_type(&conf->saddr.sin6.sin6_addr); +			int remote_type = +				ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr); + +			if (local_type & IPV6_ADDR_LINKLOCAL) { +				if (!(remote_type & IPV6_ADDR_LINKLOCAL) && +				    (remote_type != IPV6_ADDR_ANY)) +					return -EINVAL; + +				conf->flags |= VXLAN_F_IPV6_LINKLOCAL; +			} else { +				if (remote_type == +				    (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) +					return -EINVAL; + +				conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL; +			} +		}  	} -	if (conf->label && !use_ipv6) { -		pr_info("label only supported in use with IPv6\n"); +	if (conf->label && !use_ipv6)  		return -EINVAL; -	} -	if (conf->remote_ifindex && -	    conf->remote_ifindex != vxlan->cfg.remote_ifindex) { -		lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); -		dst->remote_ifindex = conf->remote_ifindex; +	if (conf->remote_ifindex) { +		struct net_device *lowerdev; -		if (!lowerdev) { -			pr_info("ifindex %d does not exist\n", -				dst->remote_ifindex); +		lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); +		if (!lowerdev)  			return -ENODEV; -		}  #if IS_ENABLED(CONFIG_IPV6)  		if (use_ipv6) {  			struct inet6_dev *idev = __in6_dev_get(lowerdev); -			if (idev && idev->cnf.disable_ipv6) { -				pr_info("IPv6 is disabled via sysctl\n"); +			if (idev && idev->cnf.disable_ipv6)  				return -EPERM; -			}  		}  #endif -		if (!conf->mtu) -			dev->mtu = lowerdev->mtu - -				   (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); +		*lower = lowerdev; +	} else { +		if (vxlan_addr_multicast(&conf->remote_ip)) +			return -EINVAL; -		needed_headroom = lowerdev->hard_header_len; -	} else if (!conf->remote_ifindex && -		   vxlan_addr_multicast(&dst->remote_ip)) { -		pr_info("multicast destination requires interface to be specified\n"); -		return -EINVAL; +#if IS_ENABLED(CONFIG_IPV6) +		if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) +			return -EINVAL; +#endif + +		*lower = NULL;  	} -	if (lowerdev) { -		dev->gso_max_size = lowerdev->gso_max_size; -		dev->gso_max_segs = lowerdev->gso_max_segs; +	if (!conf->dst_port) { +		if (conf->flags & VXLAN_F_GPE) +			conf->dst_port = htons(4790); /* IANA VXLAN-GPE port */ +		else +			conf->dst_port = htons(vxlan_port);  	} -	if (conf->mtu) { -		int max_mtu = ETH_MAX_MTU; +	if (!conf->age_interval) +		conf->age_interval = FDB_AGE_DEFAULT; -		if (lowerdev) -			max_mtu = lowerdev->mtu; +	list_for_each_entry(tmp, &vn->vxlan_list, next) { +		if (tmp == old) +			continue; -		max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); +		if (tmp->cfg.vni != conf->vni) +			continue; +		if (tmp->cfg.dst_port != conf->dst_port) +			continue; +		if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) != +		    (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6))) +			continue; -		if (conf->mtu < dev->min_mtu || conf->mtu > dev->max_mtu) -			return -EINVAL; +		if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) && +		    tmp->cfg.remote_ifindex != conf->remote_ifindex) +			continue; + +		return -EEXIST; +	} + +	return 0; +} -		dev->mtu = conf->mtu; +static void vxlan_config_apply(struct net_device *dev, +			       struct vxlan_config *conf, +			       struct net_device *lowerdev, +			       struct net *src_net, +			       bool changelink) +{ +	struct vxlan_dev *vxlan = netdev_priv(dev); +	struct vxlan_rdst *dst = &vxlan->default_dst; +	unsigned short needed_headroom = ETH_HLEN; +	bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6); +	int max_mtu = ETH_MAX_MTU; -		if (conf->mtu > max_mtu) -			dev->mtu = max_mtu; +	if (!changelink) { +		if (conf->flags & VXLAN_F_GPE) +			vxlan_raw_setup(dev); +		else +			vxlan_ether_setup(dev); + +		if (conf->mtu) +			dev->mtu = conf->mtu; + +		vxlan->net = src_net;  	} +	dst->remote_vni = conf->vni; + +	memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip)); + +	if (lowerdev) { +		dst->remote_ifindex = conf->remote_ifindex; + +		dev->gso_max_size = lowerdev->gso_max_size; +		dev->gso_max_segs = lowerdev->gso_max_segs; + +		needed_headroom = lowerdev->hard_header_len; + +		max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : +					   VXLAN_HEADROOM); +	} + +	if (dev->mtu > max_mtu) +		dev->mtu = max_mtu; +  	if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)  		needed_headroom += VXLAN6_HEADROOM;  	else @@ -2958,31 +3085,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,  	dev->needed_headroom = needed_headroom;  	memcpy(&vxlan->cfg, conf, sizeof(*conf)); -	if (!vxlan->cfg.dst_port) { -		if (conf->flags & VXLAN_F_GPE) -			vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */ -		else -			vxlan->cfg.dst_port = default_port; -	} -	vxlan->flags |= conf->flags; +} -	if (!vxlan->cfg.age_interval) -		vxlan->cfg.age_interval = FDB_AGE_DEFAULT; +static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, +			       struct vxlan_config *conf, +			       bool changelink) +{ +	struct vxlan_dev *vxlan = netdev_priv(dev); +	struct net_device *lowerdev; +	int ret; -	if (changelink) -		return 0; +	ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan); +	if (ret) +		return ret; -	list_for_each_entry(tmp, &vn->vxlan_list, next) { -		if (tmp->cfg.vni == conf->vni && -		    (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 || -		     tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 && -		    tmp->cfg.dst_port == vxlan->cfg.dst_port && -		    (tmp->flags & VXLAN_F_RCV_FLAGS) == -		    (vxlan->flags & VXLAN_F_RCV_FLAGS)) { -			pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni)); -			return -EEXIST; -		} -	} +	vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);  	return 0;  } @@ -3046,22 +3163,35 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],  	}  	if (data[IFLA_VXLAN_GROUP]) { +		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) +			return -EOPNOTSUPP; +  		conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]); +		conf->remote_ip.sa.sa_family = AF_INET;  	} else if (data[IFLA_VXLAN_GROUP6]) {  		if (!IS_ENABLED(CONFIG_IPV6))  			return -EPFNOSUPPORT; +		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) +			return -EOPNOTSUPP; +  		conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);  		conf->remote_ip.sa.sa_family = AF_INET6;  	}  	if (data[IFLA_VXLAN_LOCAL]) { +		if (changelink && (conf->saddr.sa.sa_family != AF_INET)) +			return -EOPNOTSUPP; +  		conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);  		conf->saddr.sa.sa_family = AF_INET;  	} else if (data[IFLA_VXLAN_LOCAL6]) {  		if (!IS_ENABLED(CONFIG_IPV6))  			return -EPFNOSUPPORT; +		if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) +			return -EOPNOTSUPP; +  		/* TODO: respect scope id */  		conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);  		conf->saddr.sa.sa_family = AF_INET6; @@ -3081,12 +3211,10 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],  			     IPV6_FLOWLABEL_MASK;  	if (data[IFLA_VXLAN_LEARNING]) { -		if (nla_get_u8(data[IFLA_VXLAN_LEARNING])) { +		if (nla_get_u8(data[IFLA_VXLAN_LEARNING]))  			conf->flags |= VXLAN_F_LEARN; -		} else { +		else  			conf->flags &= ~VXLAN_F_LEARN; -			vxlan->flags &= ~VXLAN_F_LEARN; -		}  	} else if (!changelink) {  		/* default to learn on a new device */  		conf->flags |= VXLAN_F_LEARN; @@ -3219,7 +3347,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],  }  static int vxlan_newlink(struct net *src_net, struct net_device *dev, -			 struct nlattr *tb[], struct nlattr *data[]) +			 struct nlattr *tb[], struct nlattr *data[], +			 struct netlink_ext_ack *extack)  {  	struct vxlan_config conf;  	int err; @@ -3232,7 +3361,8 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,  }  static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], -			    struct nlattr *data[]) +			    struct nlattr *data[], +			    struct netlink_ext_ack *extack)  {  	struct vxlan_dev *vxlan = netdev_priv(dev);  	struct vxlan_rdst *dst = &vxlan->default_dst; @@ -3286,15 +3416,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],  static void vxlan_dellink(struct net_device *dev, struct list_head *head)  {  	struct vxlan_dev *vxlan = netdev_priv(dev); -	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);  	vxlan_flush(vxlan, true); -	spin_lock(&vn->sock_lock); -	if (!hlist_unhashed(&vxlan->hlist)) -		hlist_del_rcu(&vxlan->hlist); -	spin_unlock(&vn->sock_lock); -  	gro_cells_destroy(&vxlan->gro_cells);  	list_del(&vxlan->next);  	unregister_netdevice_queue(dev, head); @@ -3375,43 +3499,44 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)  	    nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||  	    nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||  	    nla_put_u8(skb, IFLA_VXLAN_LEARNING, -			!!(vxlan->flags & VXLAN_F_LEARN)) || +			!!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||  	    nla_put_u8(skb, IFLA_VXLAN_PROXY, -			!!(vxlan->flags & VXLAN_F_PROXY)) || -	    nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) || +			!!(vxlan->cfg.flags & VXLAN_F_PROXY)) || +	    nla_put_u8(skb, IFLA_VXLAN_RSC, +		       !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||  	    nla_put_u8(skb, IFLA_VXLAN_L2MISS, -			!!(vxlan->flags & VXLAN_F_L2MISS)) || +			!!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||  	    nla_put_u8(skb, IFLA_VXLAN_L3MISS, -			!!(vxlan->flags & VXLAN_F_L3MISS)) || +			!!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||  	    nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA, -		       !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) || +		       !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||  	    nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||  	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||  	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||  	    nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM, -			!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) || +			!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||  	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, -			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) || +			!!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||  	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, -			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) || +			!!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||  	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX, -			!!(vxlan->flags & VXLAN_F_REMCSUM_TX)) || +			!!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||  	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX, -			!!(vxlan->flags & VXLAN_F_REMCSUM_RX))) +			!!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))  		goto nla_put_failure;  	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))  		goto nla_put_failure; -	if (vxlan->flags & VXLAN_F_GBP && +	if (vxlan->cfg.flags & VXLAN_F_GBP &&  	    nla_put_flag(skb, IFLA_VXLAN_GBP))  		goto nla_put_failure; -	if (vxlan->flags & VXLAN_F_GPE && +	if (vxlan->cfg.flags & VXLAN_F_GPE &&  	    nla_put_flag(skb, IFLA_VXLAN_GPE))  		goto nla_put_failure; -	if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && +	if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL &&  	    nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))  		goto nla_put_failure; | 
