summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2023-10-23 09:35:02 +0100
committerDavid S. Miller <davem@davemloft.net>2023-10-23 09:35:02 +0100
commitbdf24b4bdfa59b124f9d0ff837f8d35a908da3b8 (patch)
treebe82eb8c966ea4a7efa7cd6ed0df9d8968ed9f5d /net/ipv4/tcp.c
parent35c1b273206346c4178928b1121675dc143e61d2 (diff)
parenta77a0f5c7f23a8a4981a2a3ff47baa91ceaf1f53 (diff)
Merge branch 'tcp-ts-usec-resolution'
Eric Dumazet says: ==================== tcp: add optional usec resolution to TCP TS As discussed in various public places in 2016, Google adopted usec resolution in RFC 7323 TS values, at Van Jacobson suggestion. Goals were : 1) better observability of delays in networking stacks/fabrics. 2) better disambiguation of events based on TSval/ecr values. 3) building block for congestion control modules needing usec resolution. Back then we implemented a schem based on private SYN options to safely negotiate the feature. For upstream submission, we chose to use a much simpler route attribute because this feature is probably going to be used in private networks. ip route add 10/8 ... features tcp_usec_ts References: https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf https://datatracker.ietf.org/doc/draft-wang-tcpm-low-latency-opt/ First two patches are fixing old minor bugs and might be taken by stable teams (thanks to appropriate Fixes: tags) ==================== Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c26
1 files changed, 18 insertions, 8 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 56a8d936000f..a86d8200a1e8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3629,10 +3629,16 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
tp->fastopen_no_cookie = val;
break;
case TCP_TIMESTAMP:
- if (!tp->repair)
+ if (!tp->repair) {
err = -EPERM;
- else
- WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
+ break;
+ }
+ /* val is an opaque field,
+ * and low order bit contains usec_ts enable bit.
+ * Its a best effort, and we do not care if user makes an error.
+ */
+ tp->tcp_usec_ts = val & 1;
+ WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts));
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
@@ -3754,6 +3760,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
if (tp->syn_data_acked)
info->tcpi_options |= TCPI_OPT_SYN_DATA;
+ if (tp->tcp_usec_ts)
+ info->tcpi_options |= TCPI_OPT_USEC_TS;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
@@ -3817,10 +3825,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_total_rto = tp->total_rto;
info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
info->tcpi_total_rto_time = tp->total_rto_time;
- if (tp->rto_stamp) {
- info->tcpi_total_rto_time += tcp_time_stamp_raw() -
- tp->rto_stamp;
- }
+ if (tp->rto_stamp)
+ info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp;
unlock_sock_fast(sk, slow);
}
@@ -4145,7 +4151,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+ val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset);
+ if (tp->tcp_usec_ts)
+ val |= 1;
+ else
+ val &= ~1;
break;
case TCP_NOTSENT_LOWAT:
val = READ_ONCE(tp->notsent_lowat);