From cad2929dc4321b1f237767e9bd271b61a2eaa752 Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Wed, 17 Jun 2020 13:56:05 +0700 Subject: tipc: update a binding service via broadcast Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. This way, the binding table can be updated on all peer nodes in one shot. Broadcast is used when all peer nodes, as indicated by a new capability flag TIPC_NAMED_BCAST, support reception of this message type. Four problems need to be considered when introducing this feature. 1) When establishing a link to a new peer node we still update this by a unicast 'bulk' update. This may lead to race conditions, where a later broadcast publication/withdrawal bypass the 'bulk', resulting in disordered publications, or even that a withdrawal may arrive before the corresponding publication. We solve this by adding an 'is_last_bulk' bit in the last bulk messages so that it can be distinguished from all other messages. Only when this message has arrived do we open up for reception of broadcast publications/withdrawals. 2) When a first legacy node is added to the cluster all distribution will switch over to use the legacy 'replicast' method, while the opposite happens when the last legacy node leaves the cluster. This entails another risk of message disordering that has to be handled. We solve this by adding a sequence number to the broadcast/replicast messages, so that disordering can be discovered and corrected. Note however that we don't need to consider potential message loss or duplication at this protocol level. 3) Bulk messages don't contain any sequence numbers, and will always arrive in order. Hence we must exempt those from the sequence number control and deliver them unconditionally. We solve this by adding a new 'is_bulk' bit in those messages so that they can be recognized. 4) Legacy messages, which don't contain any new bits or sequence numbers, but neither can arrive out of order, also need to be exempt from the initial synchronization and sequence number check, and delivered unconditionally. Therefore, we add another 'is_not_legacy' bit to all new messages so that those can be distinguished from legacy messages and the latter delivered directly. v1->v2: - fix warning issue reported by kbuild test robot - add santiy check to drop the publication message with a sequence number that is lower than the agreed synch point Signed-off-by: kernel test robot Signed-off-by: Hoang Huu Le Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index a4c2816c3746..030a51c4d1fa 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -75,6 +75,8 @@ struct tipc_bclink_entry { struct sk_buff_head arrvq; struct sk_buff_head inputq2; struct sk_buff_head namedq; + u16 named_rcv_nxt; + bool named_open; }; /** @@ -396,10 +398,10 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); + tipc_publ_notify(net, publ_list, addr, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); + tipc_named_node_up(net, addr, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { tipc_mon_peer_up(net, addr, bearer_id); @@ -1483,6 +1485,7 @@ static void node_lost_contact(struct tipc_node *n, /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); + __skb_queue_purge(&n->bc_entry.namedq); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { @@ -1729,12 +1732,23 @@ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) return 0; } -void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests) { + struct sk_buff_head xmitq; struct sk_buff *txskb; struct tipc_node *n; + u16 dummy; u32 dst; + /* Use broadcast if all nodes support it */ + if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) { + __skb_queue_head_init(&xmitq); + __skb_queue_tail(&xmitq, skb); + tipc_bcast_xmit(net, &xmitq, &dummy); + return; + } + + /* Otherwise use legacy replicast method */ rcu_read_lock(); list_for_each_entry_rcu(n, tipc_nodes(net), list) { dst = n->addr; @@ -1749,7 +1763,6 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) tipc_node_xmit_skb(net, txskb, dst, 0); } rcu_read_unlock(); - kfree_skb(skb); } @@ -1844,7 +1857,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */ if (!skb_queue_empty(&n->bc_entry.namedq)) - tipc_named_rcv(net, &n->bc_entry.namedq); + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); /* If reassembly or retransmission failure => reset all links to peer */ if (rc & TIPC_LINK_DOWN_EVT) @@ -2114,7 +2129,9 @@ rcv: tipc_node_link_down(n, bearer_id, false); if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) - tipc_named_rcv(net, &n->bc_entry.namedq); + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) tipc_node_mcast_rcv(n); -- cgit v1.2.3 From d8141208b032eaee0efeacaadf1734f65db73ac5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:14 +0200 Subject: net: tipc: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Andrew Lunn Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- net/tipc/discover.c | 5 ++--- net/tipc/link.c | 6 +++--- net/tipc/msg.c | 2 +- net/tipc/node.c | 4 ++-- net/tipc/socket.c | 8 +++----- net/tipc/udp_media.c | 2 +- 7 files changed, 13 insertions(+), 16 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e366ec9a7e4d..808b147df7d5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -595,7 +595,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface - * @buf: the received packet + * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond diff --git a/net/tipc/discover.c b/net/tipc/discover.c index bfe43da127c0..d4ecacddb40c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -74,7 +74,7 @@ struct tipc_discoverer { /** * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace - * @type: message type (request or response) + * @mtyp: message type (request or response) * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, @@ -339,7 +339,7 @@ exit: * @net: the applicable net namespace * @b: ptr to bearer issuing requests * @dest: destination address for request messages - * @dest_domain: network domain to which links can be established + * @skb: pointer to created frame * * Returns 0 if successful, otherwise -errno. */ @@ -393,7 +393,6 @@ void tipc_disc_delete(struct tipc_discoverer *d) * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace * @b: ptr to bearer issuing requests - * @dest_domain: network domain to which links can be established */ void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { diff --git a/net/tipc/link.c b/net/tipc/link.c index f1d9c33dae72..d46d4ee5c4fd 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -445,7 +445,7 @@ u32 tipc_link_state(struct tipc_link *l) /** * tipc_link_create - create a new link - * @n: pointer to associated node + * @net: pointer to associated network namespace * @if_name: associated interface name * @bearer_id: id (index) of associated bearer * @tolerance: link tolerance to be used by link @@ -530,7 +530,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast - * @n: pointer to associated node + * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers * @window: send window to be used * @inputq: queue to put messages ready for delivery @@ -989,7 +989,7 @@ void tipc_link_reset(struct tipc_link *l) /** * tipc_link_xmit(): enqueue buffer list according to queue situation - * @link: link to use + * @l: link to use * @list: chain of buffers containing message * @xmitq: returned list of packets to be sent by caller * diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 01b64869a173..848fae674532 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -202,7 +202,7 @@ err: /** * tipc_msg_append(): Append data to tail of an existing buffer queue - * @hdr: header to be used + * @_hdr: header to be used * @m: the data to be appended * @mss: max allowable size of buffer * @dlen: size of data to be appended diff --git a/net/tipc/node.c b/net/tipc/node.c index 030a51c4d1fa..4edcee3088da 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1515,7 +1515,7 @@ static void node_lost_contact(struct tipc_node *n, * tipc_node_get_linkname - get the name of a link * * @bearer_id: id of the bearer - * @node: peer node address + * @addr: peer node address * @linkname: link name output buffer * * Returns 0 on success @@ -2022,7 +2022,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace * @skb: TIPC packet - * @bearer: pointer to bearer message arrived on + * @b: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a94f38333698..fc388cef6471 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -711,7 +711,6 @@ exit: * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address - * @uaddr_len: area for returned length of socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * * Returns 0 on success, errno otherwise @@ -1053,7 +1052,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /** * tipc_send_group_bcast - send message to all members in communication group - * @sk: socket structure + * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup @@ -1673,7 +1672,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, /** * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @hdr: received message header + * @skb: received message * * Note: Address is not captured if not requested by receiver. */ @@ -2095,7 +2094,6 @@ static void tipc_write_space(struct sock *sk) /** * tipc_data_ready - wake up threads to indicate messages have been received * @sk: socket - * @len: the length of messages */ static void tipc_data_ready(struct sock *sk) { @@ -2677,7 +2675,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) /** * tipc_accept - wait for connection request * @sock: listening socket - * @newsock: new socket that is to be connected + * @new_sock: new socket that is to be connected * @flags: file-related flags associated with socket * * Returns 0 on success, errno otherwise diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 28a283f26a8d..d91b7c543e39 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -565,7 +565,7 @@ msg_full: /** * tipc_parse_udp_addr - build udp media address from netlink data - * @nlattr: netlink attribute containing sockaddr storage aligned address + * @nla: netlink attribute containing sockaddr storage aligned address * @addr: tipc media address to fill with address, port and protocol type * @scope_id: IPv6 scope id pointer, not NULL indicates it's required */ -- cgit v1.2.3