summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-07-21 17:48:36 -0700
committerJakub Kicinski <kuba@kernel.org>2025-07-21 17:48:36 -0700
commite8c24e23c4c9f515b02dde01de9ec2c945b2fddc (patch)
tree3ffc1f020184b13f91b038aaa7a1a8accb8a661c
parent1b02c861714bf28814926d1fcb3c5594de960757 (diff)
parent154e56a77d81ad49eb979aa64463d5fcfad51136 (diff)
Merge branch 'mptcp-add-tcp_maxseg-sockopt-support'
Matthieu Baerts says: ==================== mptcp: add TCP_MAXSEG sockopt support The TCP_MAXSEG socket option was not supported by MPTCP, mainly because it has never been requested before. But there are still valid use-cases, e.g. with HAProxy. - Patch 1 is a small cleanup patch in the MPTCP sockopt file. - Patch 2 expose some code from TCP, to avoid duplicating it in MPTCP. - Patch 3 adds TCP_MAXSEG sockopt support in MPTCP. - Patch 4 is not related to the others, it fixes a typo in a comment. Note that the new TCP_MAXSEG sockopt support has been validated by a new packetdrill script on the MPTCP CI: https://github.com/multipath-tcp/packetdrill/pull/161 v1: https://lore.kernel.org/20250716-net-next-mptcp-tcp_maxseg-v1-0-548d3a5666f6@kernel.org ==================== Link: https://patch.msgid.link/20250719-net-next-mptcp-tcp_maxseg-v2-0-8c910fbc5307@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/tcp.h1
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/sockopt.c33
5 files changed, 46 insertions, 14 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1a5737b3753d..57e478bfaef2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -621,6 +621,7 @@ void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
int tcp_sock_set_user_timeout(struct sock *sk, int val);
+int tcp_sock_set_maxseg(struct sock *sk, int val);
static inline bool dst_tcp_usec_ts(const struct dst_entry *dst)
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 31149a0ac849..71a956fbfc55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3751,6 +3751,19 @@ int tcp_set_window_clamp(struct sock *sk, int val)
return 0;
}
+int tcp_sock_set_maxseg(struct sock *sk, int val)
+{
+ /* Values greater than interface MTU won't take effect. However
+ * at the point when this call is done we typically don't yet
+ * know which interface is going to be used
+ */
+ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW))
+ return -EINVAL;
+
+ tcp_sk(sk)->rx_opt.user_mss = val;
+ return 0;
+}
+
/*
* Socket option code for TCP.
*/
@@ -3883,15 +3896,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case TCP_MAXSEG:
- /* Values greater than interface MTU won't take effect. However
- * at the point when this call is done we typically don't yet
- * know which interface is going to be used
- */
- if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
- err = -EINVAL;
- break;
- }
- tp->rx_opt.user_mss = val;
+ err = tcp_sock_set_maxseg(sk, val);
break;
case TCP_NODELAY:
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2ad1c41e963e..6c448a0be949 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1387,7 +1387,7 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
* - estimate the faster flow linger time
* - use the above to estimate the amount of byte transferred
* by the faster flow
- * - check that the amount of queued data is greter than the above,
+ * - check that the amount of queued data is greater than the above,
* otherwise do not use the picked, slower, subflow
* We select the subflow with the shorter estimated time to flush
* the queued mem, which basically ensure the above. We just need
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6ec245fd2778..1a32edf6f343 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -326,6 +326,7 @@ struct mptcp_sock {
int keepalive_cnt;
int keepalive_idle;
int keepalive_intvl;
+ int maxseg;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 3caa0a9d3b38..2c267aff95be 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -798,6 +798,23 @@ unlock:
return ret;
}
+static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
+ int optname, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ int ret = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -859,6 +876,11 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
&msk->keepalive_cnt,
val);
break;
+ case TCP_MAXSEG:
+ msk->maxseg = val;
+ ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval,
+ optlen);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -914,10 +936,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
lock_sock(sk);
ssk = msk->first;
- if (ssk) {
- ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
- goto out;
- }
+ if (ssk)
+ goto get;
ssk = __mptcp_nmpc_sk(msk);
if (IS_ERR(ssk)) {
@@ -925,6 +945,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
goto out;
}
+get:
ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
out:
@@ -1407,6 +1428,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
case TCP_IS_MPTCP:
return mptcp_put_int_option(msk, optval, optlen, 1);
+ case TCP_MAXSEG:
+ return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
+ optval, optlen);
}
return -EOPNOTSUPP;
}
@@ -1553,6 +1577,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
+ tcp_sock_set_maxseg(ssk, msk->maxseg);
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));