summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/diag.c2
-rw-r--r--net/mptcp/mptcp_pm_gen.c2
-rw-r--r--net/mptcp/options.c11
-rw-r--r--net/mptcp/pm.c3
-rw-r--r--net/mptcp/pm_netlink.c47
-rw-r--r--net/mptcp/protocol.c38
-rw-r--r--net/mptcp/protocol.h6
-rw-r--r--net/mptcp/sched.c2
-rw-r--r--net/mptcp/subflow.c17
9 files changed, 70 insertions, 58 deletions
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 2d3efb405437d..02205f7994d75 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM;
if (sf->request_bkup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC;
- if (sf->fully_established)
+ if (READ_ONCE(sf->fully_established))
flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED;
if (sf->conn_finished)
flags |= MPTCP_SUBFLOW_FLAG_CONNECTED;
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index bfb37c5a88c4e..dcffd847af33f 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -14,7 +14,7 @@
const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = {
[MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, },
- [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, },
+ [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_BE32, },
[MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16),
[MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, },
[MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, },
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 370c3836b7712..a62bc874bf1e1 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
return false;
/* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
- if (subflow->fully_established || snd_data_fin_enable ||
+ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable ||
subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
sk->sk_state != TCP_ESTABLISHED)
return false;
@@ -667,8 +667,15 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
&echo, &drop_other_suboptions))
return false;
+ /*
+ * Later on, mptcp_write_options() will enforce mutually exclusion with
+ * DSS, bail out if such option is set and we can't drop it.
+ */
if (drop_other_suboptions)
remaining += opt_size;
+ else if (opts->suboptions & OPTION_MPTCP_DSS)
+ return false;
+
len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port);
if (remaining < len)
return false;
@@ -930,7 +937,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
* will make the subflow fully established
*/
- if (likely(subflow->fully_established)) {
+ if (likely(READ_ONCE(subflow->fully_established))) {
/* on passive sockets, check for 3rd ack retransmission
* note that msk is always set by subflow_syn_recv_sock()
* for mp_join subflows
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 620264c75dc2e..16c336c519403 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
pr_debug("msk=%p\n", msk);
+
+ if (msk->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 45a2b5f05d38b..7a0f7998376a5 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -512,7 +512,8 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list,
+ lockdep_is_held(&pernet->lock)) {
if (entry->addr.id == id)
return entry;
}
@@ -782,7 +783,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
{
- struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow, *alt = NULL;
msk_owned_by_me(msk);
lockdep_assert_held(&msk->pm.lock);
@@ -793,10 +794,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
if (__mptcp_subflow_active(subflow)) {
- mptcp_pm_send_ack(msk, subflow, false, false);
- break;
+ if (!subflow->stale) {
+ mptcp_pm_send_ack(msk, subflow, false, false);
+ return;
+ }
+
+ if (!alt)
+ alt = subflow;
}
}
+
+ if (alt)
+ mptcp_pm_send_ack(msk, alt, false, false);
}
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -1134,17 +1143,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
{
struct mptcp_pm_addr_entry *entry;
struct pm_nl_pernet *pernet;
- int ret = -1;
+ int ret;
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
- list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
- ret = entry->addr.id;
- break;
- }
- }
+ entry = __lookup_addr(pernet, skc);
+ ret = entry ? entry->addr.id : -1;
rcu_read_unlock();
if (ret >= 0)
return ret;
@@ -1171,15 +1176,11 @@ bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
{
struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
struct mptcp_pm_addr_entry *entry;
- bool backup = false;
+ bool backup;
rcu_read_lock();
- list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
- backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
- break;
- }
- }
+ entry = __lookup_addr(pernet, skc);
+ backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
rcu_read_unlock();
return backup;
@@ -1816,7 +1817,7 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
goto fail;
}
- spin_lock_bh(&pernet->lock);
+ rcu_read_lock();
entry = __lookup_addr_by_id(pernet, addr.addr.id);
if (!entry) {
GENL_SET_ERR_MSG(info, "address not found");
@@ -1830,11 +1831,11 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(msg, reply);
ret = genlmsg_reply(msg, info);
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
return ret;
unlock_fail:
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
fail:
nlmsg_free(msg);
@@ -1858,7 +1859,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
pernet = pm_nl_get_pernet(net);
- spin_lock_bh(&pernet->lock);
+ rcu_read_lock();
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i);
@@ -1883,7 +1884,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg,
genlmsg_end(msg, hdr);
}
}
- spin_unlock_bh(&pernet->lock);
+ rcu_read_unlock();
cb->args[0] = id;
return msg->len;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 48d480982b787..1b2e7cbb577fc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
int delta;
if (MPTCP_SKB_CB(from)->offset ||
+ ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
@@ -528,13 +529,13 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow));
}
-static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk, int copied)
{
bool slow;
slow = lock_sock_fast(ssk);
if (tcp_can_send_ack(ssk))
- tcp_cleanup_rbuf(ssk, 1);
+ tcp_cleanup_rbuf(ssk, copied);
unlock_sock_fast(ssk, slow);
}
@@ -551,7 +552,7 @@ static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
(ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
-static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
+static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied)
{
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
@@ -559,14 +560,14 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
int space = __mptcp_space(sk);
bool cleanup, rx_empty;
- cleanup = (space > 0) && (space >= (old_space << 1));
- rx_empty = !__mptcp_rmem(sk);
+ cleanup = (space > 0) && (space >= (old_space << 1)) && copied;
+ rx_empty = !__mptcp_rmem(sk) && copied;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
- mptcp_subflow_cleanup_rbuf(ssk);
+ mptcp_subflow_cleanup_rbuf(ssk, copied);
}
}
@@ -1939,6 +1940,8 @@ do_error:
goto out;
}
+static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
+
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
size_t len, int flags,
@@ -1992,6 +1995,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
break;
}
+ mptcp_rcv_space_adjust(msk, copied);
return copied;
}
@@ -2217,9 +2221,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
copied += bytes_read;
- /* be sure to advertise window change */
- mptcp_cleanup_rbuf(msk);
-
if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
continue;
@@ -2268,7 +2269,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld\n", timeo);
- mptcp_rcv_space_adjust(msk, copied);
+ mptcp_cleanup_rbuf(msk, copied);
err = sk_wait_data(sk, &timeo, NULL);
if (err < 0) {
err = copied ? : err;
@@ -2276,7 +2277,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
- mptcp_rcv_space_adjust(msk, copied);
+ mptcp_cleanup_rbuf(msk, copied);
out_err:
if (cmsg_flags && copied >= 0) {
@@ -2728,8 +2729,8 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
- mptcp_close_timeout(sk);
+ close_timeout = (unsigned long)inet_csk(sk)->icsk_mtup.probe_timestamp -
+ tcp_jiffies32 + jiffies + mptcp_close_timeout(sk);
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
@@ -3147,8 +3148,7 @@ cleanup:
sock_hold(sk);
pr_debug("msk=%p state=%d\n", sk, sk->sk_state);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
@@ -3214,8 +3214,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
- if (msk->token)
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
+ mptcp_pm_connection_closed(msk);
/* msk->subflow is still intact, the following will not free the first
* subflow
@@ -3519,7 +3518,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
struct tcp_sock *tp = tcp_sk(ssk);
unsigned long timeout;
- if (mptcp_subflow_ctx(ssk)->fully_established)
+ if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established))
return;
/* reschedule with a timeout above RTT, as we must look only for drop */
@@ -3530,7 +3529,8 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
timeout += jiffies;
WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ smp_store_release(&icsk->icsk_ack.pending,
+ icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER);
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 568a72702b080..a93e661ef5c43 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -513,7 +513,6 @@ struct mptcp_subflow_context {
request_bkup : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
- fully_established : 1, /* path validated */
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
@@ -532,10 +531,11 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
- __unused : 8;
+ __unused : 9;
bool data_avail;
bool scheduled;
bool pm_listener; /* a listener managed by the kernel PM? */
+ bool fully_established; /* path validated */
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk)
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
- if (subflow->request_join && !subflow->fully_established)
+ if (subflow->request_join && !READ_ONCE(subflow->fully_established))
return false;
return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 78ed508ebc1b8..df7dbcfa3b713 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -60,7 +60,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
size_t offs = 0;
rcu_read_lock();
- spin_lock(&mptcp_sched_list_lock);
list_for_each_entry_rcu(sched, &mptcp_sched_list, list) {
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
@@ -69,7 +68,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
if (WARN_ON_ONCE(offs >= maxlen))
break;
}
- spin_unlock(&mptcp_sched_list_lock);
rcu_read_unlock();
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6170f2fff71e4..fd021cf8286ef 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
const struct mptcp_options_received *mp_opt)
{
subflow_set_remote_key(msk, subflow, mp_opt);
- subflow->fully_established = 1;
+ WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
@@ -971,7 +971,8 @@ enum mapping_status {
MAPPING_EMPTY,
MAPPING_DATA_FIN,
MAPPING_DUMMY,
- MAPPING_BAD_CSUM
+ MAPPING_BAD_CSUM,
+ MAPPING_NODSS
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -1128,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
return MAPPING_EMPTY;
}
+ /* If the required DSS has likely been dropped by a middlebox */
if (!subflow->map_valid)
- return MAPPING_INVALID;
+ return MAPPING_NODSS;
goto validate_seq;
}
@@ -1343,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
- status == MAPPING_BAD_CSUM))
+ status == MAPPING_BAD_CSUM || status == MAPPING_NODSS))
goto fallback;
if (status != MAPPING_OK)
@@ -1396,7 +1398,9 @@ fallback:
* subflow_error_report() will introduce the appropriate barriers
*/
subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
+ subflow->reset_reason = status == MAPPING_NODSS ?
+ MPTCP_RST_EMIDDLEBOX :
+ MPTCP_RST_EMPTCP;
reset:
WRITE_ONCE(ssk->sk_err, EBADMSG);
@@ -2045,7 +2049,6 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
- new_ctx->tcp_sock = newsk;
if (subflow_req->mp_capable) {
/* see comments in subflow_syn_recv_sock(), MPTCP connection
@@ -2062,7 +2065,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
- new_ctx->fully_established = 1;
+ WRITE_ONCE(new_ctx->fully_established, true);
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->request_bkup = subflow_req->request_bkup;