diff options
| -rw-r--r-- | include/linux/skbuff.h | 3 | ||||
| -rw-r--r-- | include/net/genetlink.h | 4 | ||||
| -rw-r--r-- | include/uapi/linux/openvswitch.h | 14 | ||||
| -rw-r--r-- | net/core/skbuff.c | 85 | ||||
| -rw-r--r-- | net/netfilter/nfnetlink_queue_core.c | 59 | ||||
| -rw-r--r-- | net/netlink/af_netlink.c | 4 | ||||
| -rw-r--r-- | net/netlink/genetlink.c | 21 | ||||
| -rw-r--r-- | net/openvswitch/datapath.c | 231 | ||||
| -rw-r--r-- | net/openvswitch/datapath.h | 6 | ||||
| -rw-r--r-- | net/openvswitch/flow.c | 96 | ||||
| -rw-r--r-- | net/openvswitch/flow.h | 33 | ||||
| -rw-r--r-- | net/openvswitch/flow_netlink.c | 66 | ||||
| -rw-r--r-- | net/openvswitch/flow_netlink.h | 1 | ||||
| -rw-r--r-- | net/openvswitch/flow_table.c | 60 | ||||
| -rw-r--r-- | net/openvswitch/flow_table.h | 6 | ||||
| -rw-r--r-- | net/openvswitch/vport.c | 6 | ||||
| -rw-r--r-- | net/openvswitch/vport.h | 1 | 
17 files changed, 483 insertions, 213 deletions
| diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 88d4f2ebbec6..956e11a168d8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2445,6 +2445,9 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,  		    struct pipe_inode_info *pipe, unsigned int len,  		    unsigned int flags);  void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +unsigned int skb_zerocopy_headlen(const struct sk_buff *from); +void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, +		  int len, int hlen);  void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);  int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);  void skb_scrub_packet(struct sk_buff *skb, bool xnet); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b177ed803b7..93695f0e22a5 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -73,6 +73,7 @@ struct genl_family {   * @attrs: netlink attributes   * @_net: network namespace   * @user_ptr: user pointers + * @dst_sk: destination socket   */  struct genl_info {  	u32			snd_seq; @@ -85,6 +86,7 @@ struct genl_info {  	struct net *		_net;  #endif  	void *			user_ptr[2]; +	struct sock *		dst_sk;  };  static inline struct net *genl_info_net(struct genl_info *info) @@ -177,6 +179,8 @@ void genl_notify(struct genl_family *family,  		 struct sk_buff *skb, struct net *net, u32 portid,  		 u32 group, struct nlmsghdr *nlh, gfp_t flags); +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, +				    gfp_t flags);  void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,  		  struct genl_family *family, int flags, u8 cmd); diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d120f9fe0017..970553cbbc8e 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -40,7 +40,15 @@ struct ovs_header {  #define OVS_DATAPATH_FAMILY  "ovs_datapath"  #define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 + +/* V2: + *   - API users are expected to provide OVS_DP_ATTR_USER_FEATURES + *     when creating the datapath. + */ +#define OVS_DATAPATH_VERSION 2 + +/* First OVS datapath version to support features */ +#define OVS_DP_VER_FEATURES 2  enum ovs_datapath_cmd {  	OVS_DP_CMD_UNSPEC, @@ -75,6 +83,7 @@ enum ovs_datapath_attr {  	OVS_DP_ATTR_UPCALL_PID,		/* Netlink PID to receive upcalls */  	OVS_DP_ATTR_STATS,		/* struct ovs_dp_stats */  	OVS_DP_ATTR_MEGAFLOW_STATS,	/* struct ovs_dp_megaflow_stats */ +	OVS_DP_ATTR_USER_FEATURES,	/* OVS_DP_F_*  */  	__OVS_DP_ATTR_MAX  }; @@ -106,6 +115,9 @@ struct ovs_vport_stats {  	__u64   tx_dropped;		/* no space available in linux  */  }; +/* Allow last Netlink attribute to be unaligned */ +#define OVS_DP_F_UNALIGNED	(1 << 0) +  /* Fixed logical ports. */  #define OVSP_LOCAL      ((__u32)0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eb96c2c22400..1d641e781f85 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2121,6 +2121,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,  }  EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + *	skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + *	@from: source buffer + * + *	Calculates the amount of linear headroom needed in the 'to' skb passed + *	into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ +	unsigned int hlen = 0; + +	if (!from->head_frag || +	    skb_headlen(from) < L1_CACHE_BYTES || +	    skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) +		hlen = skb_headlen(from); + +	if (skb_has_frag_list(from)) +		hlen = from->len; + +	return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + *	skb_zerocopy - Zero copy skb to skb + *	@to: destination buffer + *	@source: source buffer + *	@len: number of bytes to copy from source buffer + *	@hlen: size of linear headroom in destination buffer + * + *	Copies up to `len` bytes from `from` to `to` by creating references + *	to the frags in the source buffer. + * + *	The `hlen` as calculated by skb_zerocopy_headlen() specifies the + *	headroom in the `to` buffer. + */ +void +skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ +	int i, j = 0; +	int plen = 0; /* length of skb->head fragment */ +	struct page *page; +	unsigned int offset; + +	BUG_ON(!from->head_frag && !hlen); + +	/* dont bother with small payloads */ +	if (len <= skb_tailroom(to)) { +		skb_copy_bits(from, 0, skb_put(to, len), len); +		return; +	} + +	if (hlen) { +		skb_copy_bits(from, 0, skb_put(to, hlen), hlen); +		len -= hlen; +	} else { +		plen = min_t(int, skb_headlen(from), len); +		if (plen) { +			page = virt_to_head_page(from->head); +			offset = from->data - (unsigned char *)page_address(page); +			__skb_fill_page_desc(to, 0, page, offset, plen); +			get_page(page); +			j = 1; +			len -= plen; +		} +	} + +	to->truesize += len + plen; +	to->len += len + plen; +	to->data_len += len + plen; + +	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { +		if (!len) +			break; +		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; +		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); +		len -= skb_shinfo(to)->frags[j].size; +		skb_frag_ref(to, j); +		j++; +	} +	skb_shinfo(to)->nr_frags = j; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); +  void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)  {  	__wsum csum; diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index b5e1f82890df..f072fe803510 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -236,51 +236,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)  	spin_unlock_bh(&queue->lock);  } -static void -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) -{ -	int i, j = 0; -	int plen = 0; /* length of skb->head fragment */ -	struct page *page; -	unsigned int offset; - -	/* dont bother with small payloads */ -	if (len <= skb_tailroom(to)) { -		skb_copy_bits(from, 0, skb_put(to, len), len); -		return; -	} - -	if (hlen) { -		skb_copy_bits(from, 0, skb_put(to, hlen), hlen); -		len -= hlen; -	} else { -		plen = min_t(int, skb_headlen(from), len); -		if (plen) { -			page = virt_to_head_page(from->head); -			offset = from->data - (unsigned char *)page_address(page); -			__skb_fill_page_desc(to, 0, page, offset, plen); -			get_page(page); -			j = 1; -			len -= plen; -		} -	} - -	to->truesize += len + plen; -	to->len += len + plen; -	to->data_len += len + plen; - -	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { -		if (!len) -			break; -		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; -		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); -		len -= skb_shinfo(to)->frags[j].size; -		skb_frag_ref(to, j); -		j++; -	} -	skb_shinfo(to)->nr_frags = j; -} -  static int  nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,  		      bool csum_verify) @@ -330,7 +285,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,  {  	size_t size;  	size_t data_len = 0, cap_len = 0; -	int hlen = 0; +	unsigned int hlen = 0;  	struct sk_buff *skb;  	struct nlattr *nla;  	struct nfqnl_msg_packet_hdr *pmsg; @@ -382,14 +337,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,  		if (data_len > entskb->len)  			data_len = entskb->len; -		if (!entskb->head_frag || -		    skb_headlen(entskb) < L1_CACHE_BYTES || -		    skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) -			hlen = skb_headlen(entskb); - -		if (skb_has_frag_list(entskb)) -			hlen = entskb->len; -		hlen = min_t(int, data_len, hlen); +		hlen = skb_zerocopy_headlen(entskb); +		hlen = min_t(unsigned int, hlen, data_len);  		size += sizeof(struct nlattr) + hlen;  		cap_len = entskb->len;  		break; @@ -539,7 +488,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,  		nla->nla_type = NFQA_PAYLOAD;  		nla->nla_len = nla_attr_size(data_len); -		nfqnl_zcopy(skb, entskb, data_len, hlen); +		skb_zerocopy(skb, entskb, data_len, hlen);  	}  	nlh->nlmsg_len = skb->len; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b077b90c1254..34a656d90175 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1773,6 +1773,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,  	if (ring->pg_vec == NULL)  		goto out_put; +	if (ring->frame_size - NL_MMAP_HDRLEN < size) +		goto out_put; +  	skb = alloc_skb_head(gfp_mask);  	if (skb == NULL)  		goto err1; @@ -1782,6 +1785,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,  	if (ring->pg_vec == NULL)  		goto out_free; +	/* check again under lock */  	maxlen = ring->frame_size - NL_MMAP_HDRLEN;  	if (maxlen < size)  		goto out_free; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 713671ae45af..b1dcdb932a86 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -461,6 +461,26 @@ int genl_unregister_family(struct genl_family *family)  EXPORT_SYMBOL(genl_unregister_family);  /** + * genlmsg_new_unicast - Allocate generic netlink message for unicast + * @payload: size of the message payload + * @info: information on destination + * @flags: the type of memory to allocate + * + * Allocates a new sk_buff large enough to cover the specified payload + * plus required Netlink headers. Will check receiving socket for + * memory mapped i/o capability and use it if enabled. Will fall back + * to non-mapped skb if message size exceeds the frame size of the ring. + */ +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, +				    gfp_t flags) +{ +	size_t len = nlmsg_total_size(genlmsg_total_size(payload)); + +	return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); +} +EXPORT_SYMBOL_GPL(genlmsg_new_unicast); + +/**   * genlmsg_put - Add generic netlink header to netlink message   * @skb: socket buffer holding the message   * @portid: netlink portid the message is addressed to @@ -600,6 +620,7 @@ static int genl_family_rcv_msg(struct genl_family *family,  	info.genlhdr = nlmsg_data(nlh);  	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;  	info.attrs = attrbuf; +	info.dst_sk = skb->sk;  	genl_info_net_set(&info, net);  	memset(&info.user_ptr, 0, sizeof(info.user_ptr)); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6f5e1dd3be2d..df4692826ead 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -108,10 +108,9 @@ int lockdep_ovsl_is_held(void)  #endif  static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *,  			     const struct dp_upcall_info *); -static int queue_userspace_packet(struct net *, int dp_ifindex, -				  struct sk_buff *, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,  				  const struct dp_upcall_info *);  /* Must be called with rcu_read_lock or ovs_mutex. */ @@ -133,7 +132,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)  }  /* Must be called with rcu_read_lock or ovs_mutex. */ -const char *ovs_dp_name(const struct datapath *dp) +static const char *ovs_dp_name(const struct datapath *dp)  {  	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);  	return vport->ops->get_name(vport); @@ -234,7 +233,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)  	}  	/* Look up flow. */ -	flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); +	flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);  	if (unlikely(!flow)) {  		struct dp_upcall_info upcall; @@ -251,9 +250,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)  	OVS_CB(skb)->flow = flow;  	OVS_CB(skb)->pkt_key = &key; -	stats_counter = &stats->n_hit; -	ovs_flow_used(OVS_CB(skb)->flow, skb); +	ovs_flow_stats_update(OVS_CB(skb)->flow, skb);  	ovs_execute_actions(dp, skb); +	stats_counter = &stats->n_hit;  out:  	/* Update datapath statistics. */ @@ -277,7 +276,6 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,  		  const struct dp_upcall_info *upcall_info)  {  	struct dp_stats_percpu *stats; -	int dp_ifindex;  	int err;  	if (upcall_info->portid == 0) { @@ -285,16 +283,10 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,  		goto err;  	} -	dp_ifindex = get_dpifindex(dp); -	if (!dp_ifindex) { -		err = -ENODEV; -		goto err; -	} -  	if (!skb_is_gso(skb)) -		err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); +		err = queue_userspace_packet(dp, skb, upcall_info);  	else -		err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); +		err = queue_gso_packets(dp, skb, upcall_info);  	if (err)  		goto err; @@ -310,8 +302,7 @@ err:  	return err;  } -static int queue_gso_packets(struct net *net, int dp_ifindex, -			     struct sk_buff *skb, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,  			     const struct dp_upcall_info *upcall_info)  {  	unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -320,14 +311,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,  	struct sk_buff *segs, *nskb;  	int err; -	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); +	segs = __skb_gso_segment(skb, NETIF_F_SG, false);  	if (IS_ERR(segs))  		return PTR_ERR(segs);  	/* Queue all of the segments. */  	skb = segs;  	do { -		err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); +		err = queue_userspace_packet(dp, skb, upcall_info);  		if (err)  			break; @@ -380,11 +371,11 @@ static size_t key_attr_size(void)  		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */  } -static size_t upcall_msg_size(const struct sk_buff *skb, -			      const struct nlattr *userdata) +static size_t upcall_msg_size(const struct nlattr *userdata, +			      unsigned int hdrlen)  {  	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) -		+ nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ +		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */  		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */  	/* OVS_PACKET_ATTR_USERDATA */ @@ -394,15 +385,24 @@ static size_t upcall_msg_size(const struct sk_buff *skb,  	return size;  } -static int queue_userspace_packet(struct net *net, int dp_ifindex, -				  struct sk_buff *skb, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,  				  const struct dp_upcall_info *upcall_info)  {  	struct ovs_header *upcall;  	struct sk_buff *nskb = NULL;  	struct sk_buff *user_skb; /* to be queued to userspace */  	struct nlattr *nla; -	int err; +	struct genl_info info = { +		.dst_sk = ovs_dp_get_net(dp)->genl_sock, +		.snd_portid = upcall_info->portid, +	}; +	size_t len; +	unsigned int hlen; +	int err, dp_ifindex; + +	dp_ifindex = get_dpifindex(dp); +	if (!dp_ifindex) +		return -ENODEV;  	if (vlan_tx_tag_present(skb)) {  		nskb = skb_clone(skb, GFP_ATOMIC); @@ -422,7 +422,22 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  		goto out;  	} -	user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); +	/* Complete checksum if needed */ +	if (skb->ip_summed == CHECKSUM_PARTIAL && +	    (err = skb_checksum_help(skb))) +		goto out; + +	/* Older versions of OVS user space enforce alignment of the last +	 * Netlink attribute to NLA_ALIGNTO which would require extensive +	 * padding logic. Only perform zerocopy if padding is not required. +	 */ +	if (dp->user_features & OVS_DP_F_UNALIGNED) +		hlen = skb_zerocopy_headlen(skb); +	else +		hlen = skb->len; + +	len = upcall_msg_size(upcall_info->userdata, hlen); +	user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);  	if (!user_skb) {  		err = -ENOMEM;  		goto out; @@ -441,26 +456,24 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  			  nla_len(upcall_info->userdata),  			  nla_data(upcall_info->userdata)); -	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); +	/* Only reserve room for attribute header, packet data is added +	 * in skb_zerocopy() */ +	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { +		err = -ENOBUFS; +		goto out; +	} +	nla->nla_len = nla_attr_size(skb->len); -	skb_copy_and_csum_dev(skb, nla_data(nla)); +	skb_zerocopy(user_skb, skb, skb->len, hlen); -	genlmsg_end(user_skb, upcall); -	err = genlmsg_unicast(net, user_skb, upcall_info->portid); +	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; +	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);  out:  	kfree_skb(nskb);  	return err;  } -static void clear_stats(struct sw_flow *flow) -{ -	flow->used = 0; -	flow->tcp_flags = 0; -	flow->packet_count = 0; -	flow->byte_count = 0; -} -  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  {  	struct ovs_header *ovs_header = info->userhdr; @@ -499,7 +512,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  		packet->protocol = htons(ETH_P_802_2);  	/* Build an sw_flow for sending this packet. */ -	flow = ovs_flow_alloc(); +	flow = ovs_flow_alloc(false);  	err = PTR_ERR(flow);  	if (IS_ERR(flow))  		goto err_kfree_skb; @@ -635,10 +648,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  	const int skb_orig_len = skb->len;  	struct nlattr *start;  	struct ovs_flow_stats stats; +	__be16 tcp_flags; +	unsigned long used;  	struct ovs_header *ovs_header;  	struct nlattr *nla; -	unsigned long used; -	u8 tcp_flags;  	int err;  	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); @@ -667,24 +680,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  	nla_nest_end(skb, nla); -	spin_lock_bh(&flow->lock); -	used = flow->used; -	stats.n_packets = flow->packet_count; -	stats.n_bytes = flow->byte_count; -	tcp_flags = (u8)ntohs(flow->tcp_flags); -	spin_unlock_bh(&flow->lock); - +	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);  	if (used &&  	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))  		goto nla_put_failure;  	if (stats.n_packets && -	    nla_put(skb, OVS_FLOW_ATTR_STATS, -		    sizeof(struct ovs_flow_stats), &stats)) +	    nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))  		goto nla_put_failure; -	if (tcp_flags && -	    nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) +	if ((u8)ntohs(tcp_flags) && +	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))  		goto nla_put_failure;  	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if @@ -701,8 +707,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  	if (start) {  		const struct sw_flow_actions *sf_acts; -		sf_acts = rcu_dereference_check(flow->sf_acts, -						lockdep_ovsl_is_held()); +		sf_acts = rcu_dereference_ovsl(flow->sf_acts);  		err = ovs_nla_put_actions(sf_acts->actions,  					  sf_acts->actions_len, skb); @@ -726,39 +731,34 @@ error:  	return err;  } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) +static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, +					       struct genl_info *info)  { -	const struct sw_flow_actions *sf_acts; +	size_t len; -	sf_acts = ovsl_dereference(flow->sf_acts); +	len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); -	return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); +	return genlmsg_new_unicast(len, info, GFP_KERNEL);  }  static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,  					       struct datapath *dp, -					       u32 portid, u32 seq, u8 cmd) +					       struct genl_info *info, +					       u8 cmd)  {  	struct sk_buff *skb;  	int retval; -	skb = ovs_flow_cmd_alloc_info(flow); +	skb = ovs_flow_cmd_alloc_info(flow, info);  	if (!skb)  		return ERR_PTR(-ENOMEM); -	retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); +	retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, +					info->snd_seq, 0, cmd);  	BUG_ON(retval < 0);  	return skb;  } -static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, -					      const struct sw_flow_key *key) -{ -	u32 __always_unused n_mask_hit; - -	return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); -} -  static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  {  	struct nlattr **a = info->attrs; @@ -770,6 +770,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  	struct datapath *dp;  	struct sw_flow_actions *acts = NULL;  	struct sw_flow_match match; +	bool exact_5tuple;  	int error;  	/* Extract key. */ @@ -778,7 +779,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		goto error;  	ovs_match_init(&match, &key, &mask); -	error = ovs_nla_get_match(&match, +	error = ovs_nla_get_match(&match, &exact_5tuple,  				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);  	if (error)  		goto error; @@ -809,7 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		goto err_unlock_ovs;  	/* Check if this is a duplicate flow */ -	flow = __ovs_flow_tbl_lookup(&dp->table, &key); +	flow = ovs_flow_tbl_lookup(&dp->table, &key);  	if (!flow) {  		/* Bail out if we're not allowed to create a new flow. */  		error = -ENOENT; @@ -817,12 +818,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  			goto err_unlock_ovs;  		/* Allocate flow. */ -		flow = ovs_flow_alloc(); +		flow = ovs_flow_alloc(!exact_5tuple);  		if (IS_ERR(flow)) {  			error = PTR_ERR(flow);  			goto err_unlock_ovs;  		} -		clear_stats(flow);  		flow->key = masked_key;  		flow->unmasked_key = key; @@ -835,8 +835,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  			goto err_flow_free;  		} -		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, -						info->snd_seq, OVS_FLOW_CMD_NEW); +		reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);  	} else {  		/* We found a matching flow. */  		struct sw_flow_actions *old_acts; @@ -864,15 +863,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		rcu_assign_pointer(flow->sf_acts, acts);  		ovs_nla_free_flow_actions(old_acts); -		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, -					       info->snd_seq, OVS_FLOW_CMD_NEW); +		reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);  		/* Clear stats. */ -		if (a[OVS_FLOW_ATTR_CLEAR]) { -			spin_lock_bh(&flow->lock); -			clear_stats(flow); -			spin_unlock_bh(&flow->lock); -		} +		if (a[OVS_FLOW_ATTR_CLEAR]) +			ovs_flow_stats_clear(flow);  	}  	ovs_unlock(); @@ -910,7 +905,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  	}  	ovs_match_init(&match, &key, NULL); -	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); +	err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);  	if (err)  		return err; @@ -921,14 +916,13 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  		goto unlock;  	} -	flow = __ovs_flow_tbl_lookup(&dp->table, &key); +	flow = ovs_flow_tbl_lookup(&dp->table, &key);  	if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {  		err = -ENOENT;  		goto unlock;  	} -	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, -					info->snd_seq, OVS_FLOW_CMD_NEW); +	reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);  	if (IS_ERR(reply)) {  		err = PTR_ERR(reply);  		goto unlock; @@ -965,17 +959,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)  	}  	ovs_match_init(&match, &key, NULL); -	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); +	err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);  	if (err)  		goto unlock; -	flow = __ovs_flow_tbl_lookup(&dp->table, &key); +	flow = ovs_flow_tbl_lookup(&dp->table, &key);  	if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {  		err = -ENOENT;  		goto unlock;  	} -	reply = ovs_flow_cmd_alloc_info(flow); +	reply = ovs_flow_cmd_alloc_info(flow, info);  	if (!reply) {  		err = -ENOMEM;  		goto unlock; @@ -1061,6 +1055,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {  static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {  	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },  	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, +	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },  };  static struct genl_family dp_datapath_genl_family = { @@ -1119,6 +1114,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,  			&dp_megaflow_stats))  		goto nla_put_failure; +	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) +		goto nla_put_failure; +  	return genlmsg_end(skb, ovs_header);  nla_put_failure: @@ -1127,17 +1125,17 @@ error:  	return -EMSGSIZE;  } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, -					     u32 seq, u8 cmd) +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, +					     struct genl_info *info, u8 cmd)  {  	struct sk_buff *skb;  	int retval; -	skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); +	skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);  	if (!skb)  		return ERR_PTR(-ENOMEM); -	retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); +	retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd);  	if (retval < 0) {  		kfree_skb(skb);  		return ERR_PTR(retval); @@ -1165,6 +1163,24 @@ static struct datapath *lookup_datapath(struct net *net,  	return dp ? dp : ERR_PTR(-ENODEV);  } +static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +{ +	struct datapath *dp; + +	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); +	if (!dp) +		return; + +	WARN(dp->user_features, "Dropping previously announced user features\n"); +	dp->user_features = 0; +} + +static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +{ +	if (a[OVS_DP_ATTR_USER_FEATURES]) +		dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); +} +  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  {  	struct nlattr **a = info->attrs; @@ -1223,17 +1239,27 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  	parms.port_no = OVSP_LOCAL;  	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); +	ovs_dp_change(dp, a); +  	vport = new_vport(&parms);  	if (IS_ERR(vport)) {  		err = PTR_ERR(vport);  		if (err == -EBUSY)  			err = -EEXIST; +		if (err == -EEXIST) { +			/* An outdated user space instance that does not understand +			 * the concept of user_features has attempted to create a new +			 * datapath and is likely to reuse it. Drop all user features. +			 */ +			if (info->genlhdr->version < OVS_DP_VER_FEATURES) +				ovs_dp_reset_user_features(skb, info); +		} +  		goto err_destroy_ports_array;  	} -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_NEW); +	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);  	err = PTR_ERR(reply);  	if (IS_ERR(reply))  		goto err_destroy_local_port; @@ -1299,8 +1325,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)  	if (IS_ERR(dp))  		goto unlock; -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_DEL); +	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL);  	err = PTR_ERR(reply);  	if (IS_ERR(reply))  		goto unlock; @@ -1328,8 +1353,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)  	if (IS_ERR(dp))  		goto unlock; -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_NEW); +	ovs_dp_change(dp, info->attrs); + +	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);  	if (IS_ERR(reply)) {  		err = PTR_ERR(reply);  		genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, @@ -1360,8 +1386,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)  		goto unlock;  	} -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_NEW); +	reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW);  	if (IS_ERR(reply)) {  		err = PTR_ERR(reply);  		goto unlock; @@ -1441,7 +1466,7 @@ struct genl_family dp_vport_genl_family = {  	.parallel_ops = true,  }; -struct genl_multicast_group ovs_dp_vport_multicast_group = { +static struct genl_multicast_group ovs_dp_vport_multicast_group = {  	.name = OVS_VPORT_MCGROUP  }; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4067ea41be28..6be9fbb5e9cb 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,6 +88,8 @@ struct datapath {  	/* Network namespace ref. */  	struct net *net;  #endif + +	u32 user_features;  };  /** @@ -145,6 +147,8 @@ int lockdep_ovsl_is_held(void);  #define ASSERT_OVSL()		WARN_ON(unlikely(!lockdep_ovsl_is_held()))  #define ovsl_dereference(p)					\  	rcu_dereference_protected(p, lockdep_ovsl_is_held()) +#define rcu_dereference_ovsl(p)					\ +	rcu_dereference_check(p, lockdep_ovsl_is_held())  static inline struct net *ovs_dp_get_net(struct datapath *dp)  { @@ -178,14 +182,12 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n  extern struct notifier_block ovs_dp_device_notifier;  extern struct genl_family dp_vport_genl_family; -extern struct genl_multicast_group ovs_dp_vport_multicast_group;  void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);  void ovs_dp_detach_port(struct vport *);  int ovs_dp_upcall(struct datapath *, struct sk_buff *,  		  const struct dp_upcall_info *); -const char *ovs_dp_name(const struct datapath *dp);  struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,  					 u8 cmd); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b409f5279601..16f4b46161d4 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -35,6 +35,7 @@  #include <linux/ip.h>  #include <linux/ipv6.h>  #include <linux/sctp.h> +#include <linux/smp.h>  #include <linux/tcp.h>  #include <linux/udp.h>  #include <linux/icmp.h> @@ -60,10 +61,16 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)  #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)  { +	struct flow_stats *stats;  	__be16 tcp_flags = 0; +	if (!flow->stats.is_percpu) +		stats = flow->stats.stat; +	else +		stats = this_cpu_ptr(flow->stats.cpu_stats); +  	if ((flow->key.eth.type == htons(ETH_P_IP) ||  	     flow->key.eth.type == htons(ETH_P_IPV6)) &&  	    flow->key.ip.proto == IPPROTO_TCP && @@ -71,12 +78,87 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)  		tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));  	} -	spin_lock(&flow->lock); -	flow->used = jiffies; -	flow->packet_count++; -	flow->byte_count += skb->len; -	flow->tcp_flags |= tcp_flags; -	spin_unlock(&flow->lock); +	spin_lock(&stats->lock); +	stats->used = jiffies; +	stats->packet_count++; +	stats->byte_count += skb->len; +	stats->tcp_flags |= tcp_flags; +	spin_unlock(&stats->lock); +} + +static void stats_read(struct flow_stats *stats, +		       struct ovs_flow_stats *ovs_stats, +		       unsigned long *used, __be16 *tcp_flags) +{ +	spin_lock(&stats->lock); +	if (time_after(stats->used, *used)) +		*used = stats->used; +	*tcp_flags |= stats->tcp_flags; +	ovs_stats->n_packets += stats->packet_count; +	ovs_stats->n_bytes += stats->byte_count; +	spin_unlock(&stats->lock); +} + +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, +			unsigned long *used, __be16 *tcp_flags) +{ +	int cpu, cur_cpu; + +	*used = 0; +	*tcp_flags = 0; +	memset(ovs_stats, 0, sizeof(*ovs_stats)); + +	if (!flow->stats.is_percpu) { +		stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); +	} else { +		cur_cpu = get_cpu(); +		for_each_possible_cpu(cpu) { +			struct flow_stats *stats; + +			if (cpu == cur_cpu) +				local_bh_disable(); + +			stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); +			stats_read(stats, ovs_stats, used, tcp_flags); + +			if (cpu == cur_cpu) +				local_bh_enable(); +		} +		put_cpu(); +	} +} + +static void stats_reset(struct flow_stats *stats) +{ +	spin_lock(&stats->lock); +	stats->used = 0; +	stats->packet_count = 0; +	stats->byte_count = 0; +	stats->tcp_flags = 0; +	spin_unlock(&stats->lock); +} + +void ovs_flow_stats_clear(struct sw_flow *flow) +{ +	int cpu, cur_cpu; + +	if (!flow->stats.is_percpu) { +		stats_reset(flow->stats.stat); +	} else { +		cur_cpu = get_cpu(); + +		for_each_possible_cpu(cpu) { + +			if (cpu == cur_cpu) +				local_bh_disable(); + +			stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); + +			if (cpu == cur_cpu) +				local_bh_enable(); +		} +		put_cpu(); +	}  }  static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1510f51dbf74..2d770e28a3a3 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -19,6 +19,7 @@  #ifndef FLOW_H  #define FLOW_H 1 +#include <linux/cache.h>  #include <linux/kernel.h>  #include <linux/netlink.h>  #include <linux/openvswitch.h> @@ -122,8 +123,8 @@ struct sw_flow_key {  } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */  struct sw_flow_key_range { -	size_t start; -	size_t end; +	unsigned short int start; +	unsigned short int end;  };  struct sw_flow_mask { @@ -146,6 +147,22 @@ struct sw_flow_actions {  	struct nlattr actions[];  }; +struct flow_stats { +	u64 packet_count;		/* Number of packets matched. */ +	u64 byte_count;			/* Number of bytes matched. */ +	unsigned long used;		/* Last used time (in jiffies). */ +	spinlock_t lock;		/* Lock for atomic stats update. */ +	__be16 tcp_flags;		/* Union of seen TCP flags. */ +}; + +struct sw_flow_stats { +	bool is_percpu; +	union { +		struct flow_stats *stat; +		struct flow_stats __percpu *cpu_stats; +	}; +}; +  struct sw_flow {  	struct rcu_head rcu;  	struct hlist_node hash_node[2]; @@ -155,12 +172,7 @@ struct sw_flow {  	struct sw_flow_key unmasked_key;  	struct sw_flow_mask *mask;  	struct sw_flow_actions __rcu *sf_acts; - -	spinlock_t lock;	/* Lock for values below. */ -	unsigned long used;	/* Last used time (in jiffies). */ -	u64 packet_count;	/* Number of packets matched. */ -	u64 byte_count;		/* Number of bytes matched. */ -	__be16 tcp_flags;	/* Union of seen TCP flags. */ +	struct sw_flow_stats stats;  };  struct arp_eth_header { @@ -177,7 +189,10 @@ struct arp_eth_header {  	unsigned char       ar_tip[4];		/* target IP address        */  } __packed; -void ovs_flow_used(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, +			unsigned long *used, __be16 *tcp_flags); +void ovs_flow_stats_clear(struct sw_flow *flow);  u64 ovs_flow_used_time(unsigned long flow_jiffies);  int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 2bc1bc1aca3b..4d000acaed0d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -266,6 +266,20 @@ static bool is_all_zero(const u8 *fp, size_t size)  	return true;  } +static bool is_all_set(const u8 *fp, size_t size) +{ +	int i; + +	if (!fp) +		return false; + +	for (i = 0; i < size; i++) +		if (fp[i] != 0xff) +			return false; + +	return true; +} +  static int __parse_flow_nlattrs(const struct nlattr *attr,  				const struct nlattr *a[],  				u64 *attrsp, bool nz) @@ -487,8 +501,9 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,  	return 0;  } -static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs, -				const struct nlattr **a, bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match,  bool *exact_5tuple, +				u64 attrs, const struct nlattr **a, +				bool is_mask)  {  	int err;  	u64 orig_attrs = attrs; @@ -545,6 +560,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,  		SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);  	} +	if (is_mask && exact_5tuple) { +		if (match->mask->key.eth.type != htons(0xffff)) +			*exact_5tuple = false; +	} +  	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {  		const struct ovs_key_ipv4 *ipv4_key; @@ -567,6 +587,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,  		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,  				ipv4_key->ipv4_dst, is_mask);  		attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + +		if (is_mask && exact_5tuple && *exact_5tuple) { +			if (ipv4_key->ipv4_proto != 0xff || +			    ipv4_key->ipv4_src != htonl(0xffffffff) || +			    ipv4_key->ipv4_dst != htonl(0xffffffff)) +				*exact_5tuple = false; +		}  	}  	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -598,6 +625,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,  				is_mask);  		attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + +		if (is_mask && exact_5tuple && *exact_5tuple) { +			if (ipv6_key->ipv6_proto != 0xff || +			    !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || +			    !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) +				*exact_5tuple = false; +		}  	}  	if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -640,6 +674,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,  					tcp_key->tcp_dst, is_mask);  		}  		attrs &= ~(1 << OVS_KEY_ATTR_TCP); + +		if (is_mask && exact_5tuple && *exact_5tuple && +		    (tcp_key->tcp_src != htons(0xffff) || +		     tcp_key->tcp_dst != htons(0xffff))) +			*exact_5tuple = false;  	}  	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { @@ -671,6 +710,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,  					udp_key->udp_dst, is_mask);  		}  		attrs &= ~(1 << OVS_KEY_ATTR_UDP); + +		if (is_mask && exact_5tuple && *exact_5tuple && +		    (udp_key->udp_src != htons(0xffff) || +		     udp_key->udp_dst != htons(0xffff))) +			*exact_5tuple = false;  	}  	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { @@ -756,6 +800,7 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask,   * attribute specifies the mask field of the wildcarded flow.   */  int ovs_nla_get_match(struct sw_flow_match *match, +		      bool *exact_5tuple,  		      const struct nlattr *key,  		      const struct nlattr *mask)  { @@ -803,10 +848,13 @@ int ovs_nla_get_match(struct sw_flow_match *match,  		}  	} -	err = ovs_key_from_nlattrs(match, key_attrs, a, false); +	err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false);  	if (err)  		return err; +	if (exact_5tuple) +		*exact_5tuple = true; +  	if (mask) {  		err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);  		if (err) @@ -844,7 +892,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,  			}  		} -		err = ovs_key_from_nlattrs(match, mask_attrs, a, true); +		err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true);  		if (err)  			return err;  	} else { @@ -1128,19 +1176,11 @@ struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)  	return sfa;  } -/* RCU callback used by ovs_nla_free_flow_actions. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ -	struct sw_flow_actions *sf_acts = container_of(rcu, -			struct sw_flow_actions, rcu); -	kfree(sf_acts); -} -  /* Schedules 'sf_acts' to be freed after the next RCU grace period.   * The caller must hold rcu_read_lock for this to be sensible. */  void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)  { -	call_rcu(&sf_acts->rcu, rcu_free_acts_callback); +	kfree_rcu(sf_acts, rcu);  }  static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 440151045d39..b31fbe28bc7a 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,6 +45,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *,  int ovs_nla_get_flow_metadata(struct sw_flow *flow,  			      const struct nlattr *attr);  int ovs_nla_get_match(struct sw_flow_match *match, +		      bool *exact_5tuple,  		      const struct nlattr *,  		      const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 0e720c316070..b430d42b2d0f 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -44,8 +44,6 @@  #include <net/ipv6.h>  #include <net/ndisc.h> -#include "datapath.h" -  #define TBL_MIN_BUCKETS		1024  #define REHASH_INTERVAL		(10 * 60 * HZ) @@ -72,19 +70,42 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,  		*d++ = *s++ & *m++;  } -struct sw_flow *ovs_flow_alloc(void) +struct sw_flow *ovs_flow_alloc(bool percpu_stats)  {  	struct sw_flow *flow; +	int cpu;  	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);  	if (!flow)  		return ERR_PTR(-ENOMEM); -	spin_lock_init(&flow->lock);  	flow->sf_acts = NULL;  	flow->mask = NULL; +	flow->stats.is_percpu = percpu_stats; + +	if (!percpu_stats) { +		flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); +		if (!flow->stats.stat) +			goto err; + +		spin_lock_init(&flow->stats.stat->lock); +	} else { +		flow->stats.cpu_stats = alloc_percpu(struct flow_stats); +		if (!flow->stats.cpu_stats) +			goto err; + +		for_each_possible_cpu(cpu) { +			struct flow_stats *cpu_stats; + +			cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); +			spin_lock_init(&cpu_stats->lock); +		} +	}  	return flow; +err: +	kfree(flow); +	return ERR_PTR(-ENOMEM);  }  int ovs_flow_tbl_count(struct flow_table *table) @@ -118,6 +139,10 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)  static void flow_free(struct sw_flow *flow)  {  	kfree((struct sf_flow_acts __force *)flow->sf_acts); +	if (flow->stats.is_percpu) +		free_percpu(flow->stats.cpu_stats); +	else +		kfree(flow->stats.stat);  	kmem_cache_free(flow_cache, flow);  } @@ -128,13 +153,6 @@ static void rcu_free_flow_callback(struct rcu_head *rcu)  	flow_free(flow);  } -static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) -{ -	struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); - -	kfree(mask); -} -  static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)  {  	if (!mask) @@ -146,7 +164,7 @@ static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)  	if (!mask->ref_count) {  		list_del_rcu(&mask->list);  		if (deferred) -			call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); +			kfree_rcu(mask, rcu);  		else  			kfree(mask);  	} @@ -429,11 +447,11 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,  	return NULL;  } -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,  				    const struct sw_flow_key *key,  				    u32 *n_mask_hit)  { -	struct table_instance *ti = rcu_dereference(tbl->ti); +	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);  	struct sw_flow_mask *mask;  	struct sw_flow *flow; @@ -447,6 +465,14 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,  	return NULL;  } +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, +				    const struct sw_flow_key *key) +{ +	u32 __always_unused n_mask_hit; + +	return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); +} +  int ovs_flow_tbl_num_masks(const struct flow_table *table)  {  	struct sw_flow_mask *mask; @@ -514,11 +540,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,  	return NULL;  } -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ +/* Add 'mask' into the mask list, if it is not already there. */  static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,  			    struct sw_flow_mask *new)  { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index fbe45d5ad07d..1996e34c0fd8 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -55,7 +55,7 @@ struct flow_table {  int ovs_flow_init(void);  void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(void); +struct sw_flow *ovs_flow_alloc(bool percpu_stats);  void ovs_flow_free(struct sw_flow *, bool deferred);  int ovs_flow_tbl_init(struct flow_table *); @@ -69,9 +69,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);  int  ovs_flow_tbl_num_masks(const struct flow_table *table);  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,  				       u32 *bucket, u32 *idx); -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,  				    const struct sw_flow_key *,  				    u32 *n_mask_hit); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, +				    const struct sw_flow_key *);  bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,  			       struct sw_flow_match *match); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index f5275dd29cd9..208dd9a26dd1 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -33,6 +33,9 @@  #include "vport.h"  #include "vport-internal_dev.h" +static void ovs_vport_record_error(struct vport *, +				   enum vport_err_type err_type); +  /* List of statically compiled vport implementations.  Don't forget to also   * add yours to the list at the bottom of vport.h. */  static const struct vport_ops *vport_ops_list[] = { @@ -396,7 +399,8 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)   * If using the vport generic stats layer indicate that an error of the given   * type has occurred.   */ -void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) +static void ovs_vport_record_error(struct vport *vport, +				   enum vport_err_type err_type)  {  	spin_lock(&vport->stats_lock); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index bc97ef7fa2af..d7e50a17396c 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -192,7 +192,6 @@ static inline struct vport *vport_from_priv(const void *priv)  void ovs_vport_receive(struct vport *, struct sk_buff *,  		       struct ovs_key_ipv4_tunnel *); -void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);  /* List of statically compiled vport implementations.  Don't forget to also   * add yours to the list at the top of vport.c. */ | 
