From 9d9b1ee0b2d1c9e02b2338c4a4b0a062d2d3edac Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Fri, 15 Jan 2021 14:30:58 -0800 Subject: tcp: fix TCP_USER_TIMEOUT with zero window The TCP session does not terminate with TCP_USER_TIMEOUT when data remain untransmitted due to zero window. The number of unanswered zero-window probes (tcp_probes_out) is reset to zero with incoming acks irrespective of the window size, as described in tcp_probe_timer(): RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as long as the receiver continues to respond probes. We support this by default and reset icsk_probes_out with incoming ACKs. This counter, however, is the wrong one to be used in calculating the duration that the window remains closed and data remain untransmitted. Thanks to Jonathan Maxwell for diagnosing the actual issue. In this patch a new timestamp is introduced for the socket in order to track the elapsed time for the zero-window probes that have not been answered with any non-zero window ack. Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT") Reported-by: William McCall Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Signed-off-by: Enke Chen Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210115223058.GA39267@localhost.localdomain Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c7e16b0ed791f..bafcab75f4256 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3384,6 +3384,7 @@ static void tcp_ack_probe(struct sock *sk) return; if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) { icsk->icsk_backoff = 0; + icsk->icsk_probes_tstamp = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! -- cgit v1.2.3 From 9c30ae8398b0813e237bde387d67a7f74ab2db2d Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 19 Jan 2021 11:26:19 -0800 Subject: tcp: fix TCP socket rehash stats mis-accounting The previous commit 32efcc06d2a1 ("tcp: export count for rehash attempts") would mis-account rehashing SNMP and socket stats: a. During handshake of an active open, only counts the first SYN timeout b. After handshake of passive and active open, stop updating after (roughly) TCP_RETRIES1 recurring RTOs c. After the socket aborts, over count timeout_rehash by 1 This patch fixes this by checking the rehash result from sk_rethink_txhash. Fixes: 32efcc06d2a1 ("tcp: export count for rehash attempts") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Link: https://lore.kernel.org/r/20210119192619.1848270-1-ycheng@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 ++++++++++++----- net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_timer.c | 22 ++++++++-------------- 3 files changed, 22 insertions(+), 22 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/sock.h b/include/net/sock.h index bdc4323ce53c9..129d200bccb46 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1921,10 +1921,13 @@ static inline void sk_set_txhash(struct sock *sk) sk->sk_txhash = net_tx_rndhash(); } -static inline void sk_rethink_txhash(struct sock *sk) +static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) + if (sk->sk_txhash) { sk_set_txhash(sk); + return true; + } + return false; } static inline struct dst_entry * @@ -1947,12 +1950,10 @@ sk_dst_get(struct sock *sk) return dst; } -static inline void dst_negative_advice(struct sock *sk) +static inline void __dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); - sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) { ndst = dst->ops->negative_advice(dst); @@ -1964,6 +1965,12 @@ static inline void dst_negative_advice(struct sock *sk) } } +static inline void dst_negative_advice(struct sock *sk) +{ + sk_rethink_txhash(sk); + __dst_negative_advice(sk); +} + static inline void __sk_dst_set(struct sock *sk, struct dst_entry *dst) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bafcab75f4256..a7dfca0a38cd7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4397,10 +4397,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) * The receiver remembers and reflects via DSACKs. Leverage the * DSACK state and change the txhash to re-route speculatively. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) { - sk_rethink_txhash(sk); + if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && + sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); - } } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 454732ecc8f33..faa92948441ba 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -219,14 +219,8 @@ static int tcp_write_timeout(struct sock *sk) int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (icsk->icsk_retransmits) { - dst_negative_advice(sk); - } else { - sk_rethink_txhash(sk); - tp->timeout_rehash++; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTIMEOUTREHASH); - } + if (icsk->icsk_retransmits) + __dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; expired = icsk->icsk_retransmits >= retry_until; } else { @@ -234,12 +228,7 @@ static int tcp_write_timeout(struct sock *sk) /* Black hole detection */ tcp_mtu_probing(icsk, sk); - dst_negative_advice(sk); - } else { - sk_rethink_txhash(sk); - tp->timeout_rehash++; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTIMEOUTREHASH); + __dst_negative_advice(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; @@ -270,6 +259,11 @@ static int tcp_write_timeout(struct sock *sk) return 1; } + if (sk_rethink_txhash(sk)) { + tp->timeout_rehash++; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); + } + return 0; } -- cgit v1.2.3