summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-01-09 18:18:47 -0800
committerJakub Kicinski <kuba@kernel.org>2021-01-09 18:18:47 -0800
commit49888961a45ac4092fd84cb4e548f45f5a42e74c (patch)
tree4342a9bf2e6bb8680455185983dc4a302707e9d0 /net
parent43b3983437ed4a3458591daf7777529cd72c8fd0 (diff)
parent718eb44e5c1e9594d6cebc1798a73c1a314de7e2 (diff)
Merge branch 'mptcp-add-mp_prio-support-and-rework-local-address-ids'
Mat Martineau says: ==================== MPTCP: Add MP_PRIO support and rework local address IDs Patches 1 and 2 rework the assignment of local address IDs to allow them to be assigned by a userspace path manager, and add corresponding self tests. Patches 2-8 add the ability to change subflow priority after a subflow has been established. Each subflow in a MPTCP connection has a priority level: "regular" or "backup". Data should only be sent on backup subflows if no regular subflows are available. The priority level can be set when the subflow connection is established (as was already implemented), or during the life of the connection by sending MP_PRIO in the TCP options (as added here). Self tests are included. ==================== Link: https://lore.kernel.org/r/20210109004802.341602-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/options.c56
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/pm_netlink.c172
-rw-r--r--net/mptcp/protocol.h11
6 files changed, 233 insertions, 18 deletions
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index b921cbdd9aaa2..8ca196489893f 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -31,6 +31,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
+ SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
+ SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 47bcecce1106e..63914a5ef6a5d 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -24,6 +24,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
+ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
+ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e0d21c0607e53..c9643344a8d74 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -282,6 +282,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
break;
+ case MPTCPOPT_MP_PRIO:
+ if (opsize != TCPOLEN_MPTCP_PRIO)
+ break;
+
+ mp_opt->mp_prio = 1;
+ mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
+ pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
+ break;
+
case MPTCPOPT_MP_FASTCLOSE:
if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
break;
@@ -313,6 +322,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
+ mp_opt->mp_prio = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -679,6 +689,28 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
return true;
}
+static bool mptcp_established_options_mp_prio(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (!subflow->send_mp_prio)
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_PRIO)
+ return false;
+
+ *size = TCPOLEN_MPTCP_PRIO;
+ opts->suboptions |= OPTION_MPTCP_PRIO;
+ opts->backup = subflow->request_bkup;
+
+ pr_debug("prio=%d", opts->backup);
+
+ return true;
+}
+
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
@@ -721,6 +753,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
ret = true;
}
+ if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ ret = true;
+ }
+
return ret;
}
@@ -994,6 +1032,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mp_opt.rm_addr = 0;
}
+ if (mp_opt.mp_prio) {
+ mptcp_pm_mp_prio_received(sk, mp_opt.backup);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
+ mp_opt.mp_prio = 0;
+ }
+
if (!mp_opt.dss)
return;
@@ -1168,6 +1212,18 @@ mp_capable_done:
0, opts->rm_id);
}
+ if (OPTION_MPTCP_PRIO & opts->suboptions) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_prio = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
+ TCPOLEN_MPTCP_PRIO,
+ opts->backup, TCPOPT_NOP);
+ }
+
if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index da2ed576f2899..0a6ebd0642ec9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -207,6 +207,14 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
spin_unlock_bh(&pm->lock);
}
+void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
+ subflow->backup = bkup;
+}
+
/* path manager helpers */
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index a6d983d80576a..9b1f6298bbdba 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -36,6 +36,9 @@ struct mptcp_pm_add_entry {
u8 retrans_times;
};
+#define MAX_ADDR_ID 255
+#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
+
struct pm_nl_pernet {
/* protects pernet updates */
spinlock_t lock;
@@ -46,6 +49,7 @@ struct pm_nl_pernet {
unsigned int local_addr_max;
unsigned int subflows_max;
unsigned int next_id;
+ unsigned long id_bitmap[BITMAP_SZ];
};
#define MPTCP_PM_ADDR_MAX 8
@@ -438,6 +442,41 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
}
}
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ u8 bkup)
+{
+ struct mptcp_subflow_context *subflow;
+
+ pr_debug("bkup=%d", bkup);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_addr_info local;
+
+ local_address((struct sock_common *)ssk, &local);
+ if (!addresses_equal(&local, addr, addr->port))
+ continue;
+
+ subflow->backup = bkup;
+ subflow->send_mp_prio = 1;
+ subflow->request_bkup = bkup;
+ __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX);
+
+ spin_unlock_bh(&msk->pm.lock);
+ pr_debug("send ack for mp_prio");
+ lock_sock(ssk);
+ tcp_send_ack(ssk);
+ release_sock(ssk);
+ spin_lock_bh(&msk->pm.lock);
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
@@ -524,10 +563,12 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
/* to keep the code simple, don't do IDR-like allocation for address ID,
* just bail when we exceed limits
*/
- if (pernet->next_id > 255)
- goto out;
+ if (pernet->next_id == MAX_ADDR_ID)
+ pernet->next_id = 1;
if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
goto out;
+ if (test_bit(entry->addr.id, pernet->id_bitmap))
+ goto out;
/* do not insert duplicate address, differentiate on port only
* singled addresses
@@ -539,12 +580,30 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
goto out;
}
+ if (!entry->addr.id) {
+find_next:
+ entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
+ MAX_ADDR_ID + 1,
+ pernet->next_id);
+ if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) &&
+ pernet->next_id != 1) {
+ pernet->next_id = 1;
+ goto find_next;
+ }
+ }
+
+ if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID)
+ goto out;
+
+ __set_bit(entry->addr.id, pernet->id_bitmap);
+ if (entry->addr.id > pernet->next_id)
+ pernet->next_id = entry->addr.id;
+
if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
pernet->add_addr_signal_max++;
if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
pernet->local_addr_max++;
- entry->addr.id = pernet->next_id++;
pernet->addrs++;
list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
ret = entry->addr.id;
@@ -597,6 +656,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
entry->addr = skc_local;
entry->addr.ifindex = 0;
entry->addr.flags = 0;
+ entry->addr.id = 0;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
kfree(entry);
@@ -857,6 +917,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
pernet->addrs--;
list_del_rcu(&entry->list);
+ __clear_bit(entry->addr.id, pernet->id_bitmap);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
@@ -894,6 +955,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&pernet->lock);
list_splice_init(&pernet->local_addr_list, &free_list);
__reset_counters(pernet);
+ pernet->next_id = 1;
+ bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
__flush_addrs(sock_net(skb->sk), &free_list);
return 0;
@@ -994,27 +1057,34 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
struct pm_nl_pernet *pernet;
int id = cb->args[0];
void *hdr;
+ int i;
pernet = net_generic(net, pm_nl_pernet_id);
spin_lock_bh(&pernet->lock);
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (entry->addr.id <= id)
- continue;
-
- hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, &mptcp_genl_family,
- NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
- if (!hdr)
- break;
+ for (i = id; i < MAX_ADDR_ID + 1; i++) {
+ if (test_bit(i, pernet->id_bitmap)) {
+ entry = __lookup_addr_by_id(pernet, i);
+ if (!entry)
+ break;
+
+ if (entry->addr.id <= id)
+ continue;
+
+ hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &mptcp_genl_family,
+ NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
+ if (!hdr)
+ break;
+
+ if (mptcp_nl_fill_addr(msg, entry) < 0) {
+ genlmsg_cancel(msg, hdr);
+ break;
+ }
- if (mptcp_nl_fill_addr(msg, entry) < 0) {
- genlmsg_cancel(msg, hdr);
- break;
+ id = entry->addr.id;
+ genlmsg_end(msg, hdr);
}
-
- id = entry->addr.id;
- genlmsg_end(msg, hdr);
}
spin_unlock_bh(&pernet->lock);
@@ -1096,6 +1166,66 @@ fail:
return -EMSGSIZE;
}
+static int mptcp_nl_addr_backup(struct net *net,
+ struct mptcp_addr_info *addr,
+ u8 bkup)
+{
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
+ int ret = -EINVAL;
+
+ while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+ struct sock *sk = (struct sock *)msk;
+
+ if (list_empty(&msk->conn_list))
+ goto next;
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+
+next:
+ sock_put(sk);
+ cond_resched();
+ }
+
+ return ret;
+}
+
+static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+ struct mptcp_pm_addr_entry addr, *entry;
+ struct net *net = sock_net(skb->sk);
+ u8 bkup = 0;
+ int ret;
+
+ ret = mptcp_pm_parse_addr(attr, info, true, &addr);
+ if (ret < 0)
+ return ret;
+
+ if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if (addresses_equal(&entry->addr, &addr.addr, true)) {
+ ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
+ if (ret)
+ return ret;
+
+ if (bkup)
+ entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ }
+ }
+
+ return 0;
+}
+
static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_ADD_ADDR,
@@ -1126,6 +1256,11 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
.cmd = MPTCP_PM_CMD_GET_LIMITS,
.doit = mptcp_nl_cmd_get_limits,
},
+ {
+ .cmd = MPTCP_PM_CMD_SET_FLAGS,
+ .doit = mptcp_nl_cmd_set_flags,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family mptcp_genl_family __ro_after_init = {
@@ -1148,6 +1283,7 @@ static int __net_init pm_nl_init_net(struct net *net)
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
__reset_counters(pernet);
pernet->next_id = 1;
+ bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_lock_init(&pernet->lock);
return 0;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d67de793d363f..d6400ad2d6156 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -24,6 +24,7 @@
#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
#define OPTION_MPTCP_RM_ADDR BIT(8)
#define OPTION_MPTCP_FASTCLOSE BIT(9)
+#define OPTION_MPTCP_PRIO BIT(10)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -59,6 +60,7 @@
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24
#define TCPOLEN_MPTCP_PORT_LEN 4
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
+#define TCPOLEN_MPTCP_PRIO 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
/* MPTCP MP_JOIN flags */
@@ -86,6 +88,9 @@
#define MPTCP_ADDR_IPVERSION_4 4
#define MPTCP_ADDR_IPVERSION_6 6
+/* MPTCP MP_PRIO flags */
+#define MPTCP_PRIO_BKUP BIT(0)
+
/* MPTCP socket flags */
#define MPTCP_DATA_READY 0
#define MPTCP_NOSPACE 1
@@ -116,6 +121,7 @@ struct mptcp_options_received {
dss : 1,
add_addr : 1,
rm_addr : 1,
+ mp_prio : 1,
family : 4,
echo : 1,
backup : 1;
@@ -396,6 +402,7 @@ struct mptcp_subflow_context {
map_valid : 1,
mpc_map : 1,
backup : 1,
+ send_mp_prio : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1; /* ctx can be free at ulp release time */
@@ -550,6 +557,10 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ u8 bkup);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,