diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/diag.c | 2 | ||||
-rw-r--r-- | net/mptcp/mptcp_pm_gen.c | 2 | ||||
-rw-r--r-- | net/mptcp/options.c | 11 | ||||
-rw-r--r-- | net/mptcp/pm.c | 3 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 47 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 38 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 6 | ||||
-rw-r--r-- | net/mptcp/sched.c | 2 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 17 |
9 files changed, 70 insertions, 58 deletions
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 2d3efb405437d..02205f7994d75 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM; if (sf->request_bkup) flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC; - if (sf->fully_established) + if (READ_ONCE(sf->fully_established)) flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED; if (sf->conn_finished) flags |= MPTCP_SUBFLOW_FLAG_CONNECTED; diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index bfb37c5a88c4e..dcffd847af33f 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -14,7 +14,7 @@ const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = { [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_BE32, }, [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16), [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, }, [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, }, diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 370c3836b7712..a62bc874bf1e1 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, return false; /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ - if (subflow->fully_established || snd_data_fin_enable || + if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable || subflow->snd_isn != TCP_SKB_CB(skb)->seq || sk->sk_state != TCP_ESTABLISHED) return false; @@ -667,8 +667,15 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * &echo, &drop_other_suboptions)) return false; + /* + * Later on, mptcp_write_options() will enforce mutually exclusion with + * DSS, bail out if such option is set and we can't drop it. + */ if (drop_other_suboptions) remaining += opt_size; + else if (opts->suboptions & OPTION_MPTCP_DSS) + return false; + len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); if (remaining < len) return false; @@ -930,7 +937,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established */ - if (likely(subflow->fully_established)) { + if (likely(READ_ONCE(subflow->fully_established))) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 620264c75dc2e..16c336c519403 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) void mptcp_pm_connection_closed(struct mptcp_sock *msk) { pr_debug("msk=%p\n", msk); + + if (msk->token) + mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); } void mptcp_pm_subflow_established(struct mptcp_sock *msk) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 45a2b5f05d38b..7a0f7998376a5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -512,7 +512,8 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) { struct mptcp_pm_addr_entry *entry; - list_for_each_entry(entry, &pernet->local_addr_list, list) { + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, + lockdep_is_held(&pernet->lock)) { if (entry->addr.id == id) return entry; } @@ -782,7 +783,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *alt = NULL; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); @@ -793,10 +794,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { if (__mptcp_subflow_active(subflow)) { - mptcp_pm_send_ack(msk, subflow, false, false); - break; + if (!subflow->stale) { + mptcp_pm_send_ack(msk, subflow, false, false); + return; + } + + if (!alt) + alt = subflow; } } + + if (alt) + mptcp_pm_send_ack(msk, alt, false, false); } int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, @@ -1134,17 +1143,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc { struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; - int ret = -1; + int ret; pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { - ret = entry->addr.id; - break; - } - } + entry = __lookup_addr(pernet, skc); + ret = entry ? entry->addr.id : -1; rcu_read_unlock(); if (ret >= 0) return ret; @@ -1171,15 +1176,11 @@ bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); struct mptcp_pm_addr_entry *entry; - bool backup = false; + bool backup; rcu_read_lock(); - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { - backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - break; - } - } + entry = __lookup_addr(pernet, skc); + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); rcu_read_unlock(); return backup; @@ -1816,7 +1817,7 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) goto fail; } - spin_lock_bh(&pernet->lock); + rcu_read_lock(); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { GENL_SET_ERR_MSG(info, "address not found"); @@ -1830,11 +1831,11 @@ int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, reply); ret = genlmsg_reply(msg, info); - spin_unlock_bh(&pernet->lock); + rcu_read_unlock(); return ret; unlock_fail: - spin_unlock_bh(&pernet->lock); + rcu_read_unlock(); fail: nlmsg_free(msg); @@ -1858,7 +1859,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, pernet = pm_nl_get_pernet(net); - spin_lock_bh(&pernet->lock); + rcu_read_lock(); for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { if (test_bit(i, pernet->id_bitmap)) { entry = __lookup_addr_by_id(pernet, i); @@ -1883,7 +1884,7 @@ int mptcp_pm_nl_dump_addr(struct sk_buff *msg, genlmsg_end(msg, hdr); } } - spin_unlock_bh(&pernet->lock); + rcu_read_unlock(); cb->args[0] = id; return msg->len; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 48d480982b787..1b2e7cbb577fc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, int delta; if (MPTCP_SKB_CB(from)->offset || + ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; @@ -528,13 +529,13 @@ static void mptcp_send_ack(struct mptcp_sock *msk) mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow)); } -static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk, int copied) { bool slow; slow = lock_sock_fast(ssk); if (tcp_can_send_ack(ssk)) - tcp_cleanup_rbuf(ssk, 1); + tcp_cleanup_rbuf(ssk, copied); unlock_sock_fast(ssk, slow); } @@ -551,7 +552,7 @@ static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty) (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED))); } -static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) +static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) { int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; @@ -559,14 +560,14 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) int space = __mptcp_space(sk); bool cleanup, rx_empty; - cleanup = (space > 0) && (space >= (old_space << 1)); - rx_empty = !__mptcp_rmem(sk); + cleanup = (space > 0) && (space >= (old_space << 1)) && copied; + rx_empty = !__mptcp_rmem(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty)) - mptcp_subflow_cleanup_rbuf(ssk); + mptcp_subflow_cleanup_rbuf(ssk, copied); } } @@ -1939,6 +1940,8 @@ do_error: goto out; } +static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); + static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, size_t len, int flags, @@ -1992,6 +1995,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, break; } + mptcp_rcv_space_adjust(msk, copied); return copied; } @@ -2217,9 +2221,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - /* be sure to advertise window change */ - mptcp_cleanup_rbuf(msk); - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) continue; @@ -2268,7 +2269,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } pr_debug("block timeout %ld\n", timeo); - mptcp_rcv_space_adjust(msk, copied); + mptcp_cleanup_rbuf(msk, copied); err = sk_wait_data(sk, &timeo, NULL); if (err < 0) { err = copied ? : err; @@ -2276,7 +2277,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } - mptcp_rcv_space_adjust(msk, copied); + mptcp_cleanup_rbuf(msk, copied); out_err: if (cmsg_flags && copied >= 0) { @@ -2728,8 +2729,8 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + - mptcp_close_timeout(sk); + close_timeout = (unsigned long)inet_csk(sk)->icsk_mtup.probe_timestamp - + tcp_jiffies32 + jiffies + mptcp_close_timeout(sk); /* the close timeout takes precedence on the fail one, and here at least one of * them is active @@ -3147,8 +3148,7 @@ cleanup: sock_hold(sk); pr_debug("msk=%p state=%d\n", sk, sk->sk_state); - if (msk->token) - mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); + mptcp_pm_connection_closed(msk); if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); @@ -3214,8 +3214,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); - if (msk->token) - mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); + mptcp_pm_connection_closed(msk); /* msk->subflow is still intact, the following will not free the first * subflow @@ -3519,7 +3518,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk) struct tcp_sock *tp = tcp_sk(ssk); unsigned long timeout; - if (mptcp_subflow_ctx(ssk)->fully_established) + if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established)) return; /* reschedule with a timeout above RTT, as we must look only for drop */ @@ -3530,7 +3529,8 @@ static void schedule_3rdack_retransmission(struct sock *ssk) timeout += jiffies; WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER); - icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + smp_store_release(&icsk->icsk_ack.pending, + icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); icsk->icsk_ack.timeout = timeout; sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 568a72702b080..a93e661ef5c43 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -513,7 +513,6 @@ struct mptcp_subflow_context { request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ - fully_established : 1, /* path validated */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, @@ -532,10 +531,11 @@ struct mptcp_subflow_context { is_mptfo : 1, /* subflow is doing TFO */ close_event_done : 1, /* has done the post-closed part */ mpc_drop : 1, /* the MPC option has been dropped in a rtx */ - __unused : 8; + __unused : 9; bool data_avail; bool scheduled; bool pm_listener; /* a listener managed by the kernel PM? */ + bool fully_established; /* path validated */ u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk) static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) + if (subflow->request_join && !READ_ONCE(subflow->fully_established)) return false; return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 78ed508ebc1b8..df7dbcfa3b713 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -60,7 +60,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) size_t offs = 0; rcu_read_lock(); - spin_lock(&mptcp_sched_list_lock); list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", @@ -69,7 +68,6 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) if (WARN_ON_ONCE(offs >= maxlen)) break; } - spin_unlock(&mptcp_sched_list_lock); rcu_read_unlock(); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6170f2fff71e4..fd021cf8286ef 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk, const struct mptcp_options_received *mp_opt) { subflow_set_remote_key(msk, subflow, mp_opt); - subflow->fully_established = 1; + WRITE_ONCE(subflow->fully_established, true); WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) @@ -971,7 +971,8 @@ enum mapping_status { MAPPING_EMPTY, MAPPING_DATA_FIN, MAPPING_DUMMY, - MAPPING_BAD_CSUM + MAPPING_BAD_CSUM, + MAPPING_NODSS }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) @@ -1128,8 +1129,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, return MAPPING_EMPTY; } + /* If the required DSS has likely been dropped by a middlebox */ if (!subflow->map_valid) - return MAPPING_INVALID; + return MAPPING_NODSS; goto validate_seq; } @@ -1343,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || - status == MAPPING_BAD_CSUM)) + status == MAPPING_BAD_CSUM || status == MAPPING_NODSS)) goto fallback; if (status != MAPPING_OK) @@ -1396,7 +1398,9 @@ fallback: * subflow_error_report() will introduce the appropriate barriers */ subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; + subflow->reset_reason = status == MAPPING_NODSS ? + MPTCP_RST_EMIDDLEBOX : + MPTCP_RST_EMPTCP; reset: WRITE_ONCE(ssk->sk_err, EBADMSG); @@ -2045,7 +2049,6 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->tcp_state_change = old_ctx->tcp_state_change; new_ctx->tcp_error_report = old_ctx->tcp_error_report; new_ctx->rel_write_seq = 1; - new_ctx->tcp_sock = newsk; if (subflow_req->mp_capable) { /* see comments in subflow_syn_recv_sock(), MPTCP connection @@ -2062,7 +2065,7 @@ static void subflow_ulp_clone(const struct request_sock *req, } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; - new_ctx->fully_established = 1; + WRITE_ONCE(new_ctx->fully_established, true); new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; new_ctx->request_bkup = subflow_req->request_bkup; |