diff options
author | David S. Miller <davem@davemloft.net> | 2021-08-18 10:10:01 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-08-18 10:10:01 +0100 |
commit | 606befcd5db4c2493d4b7db9d186eb3e881c483c (patch) | |
tree | e487836c8362f6c49bc0e28a6c34a7cc8b0366a3 | |
parent | 3349d3625d62e4c0d90e854a5b7e8efda8a5c994 (diff) | |
parent | f7713dd5d23a1fbb8758fca09847906c62774277 (diff) |
Merge branch 'mptcp-mesh-path-manager'
Mat Martineau says:
====================
mptcp: Add full mesh path manager option
The path manager in MPTCP controls the creation of additional subflows
after the initial connection is created. As each peer advertises
available endpoints with the ADD_ADDR MPTCP option, the recipient of
those advertisements must decide which subflows to create from the known
local and remote interfaces that are available for use by MPTCP.
The existing in-kernel path manager will create one additional subflow
when an ADD_ADDR is received, or a local address is newly configured for
MPTCP use. The maximum number of subflows has a configurable limit.
This patch set adds a MPTCP_PM_ADDR_FLAG_FULLMESH flag to the MPTCP
netlink API that enables subflows to be created more aggressively. When
an ADD_ADDR is received from a peer, new subflows are created between
that address/port and all local addresses configured for MPTCP.
Similarly, when a new local address is newly configured for use by
MPTCP, new subflows are created between that local address and all known
remote addresses for that MPTCP connection. The configurable limit on
the number of subflows still applies. If the new flag is not used the
path manager behavior is unchanged.
Patch 1 adds a helper function and refactors another function to prepare
for the rest of the patch series.
Patches 2 and 3 add two mesh connection capabilities: initiating
subflows based on added local addresses, or reacting to incoming
advertisements.
Patches 4-6 add full mesh cases to the self tests.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/uapi/linux/mptcp.h | 1 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 154 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 5 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 7 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 74 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 16 |
6 files changed, 231 insertions, 26 deletions
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 7b05f7102321a..f66038b9551fa 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -73,6 +73,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) enum { MPTCP_PM_CMD_UNSPEC, diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ac0aa6faacfa4..6e3df62a87d23 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -410,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } +static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr, + struct mptcp_addr_info *addr) +{ + int i; + + for (i = 0; i < nr; i++) { + if (addresses_equal(&addrs[i], addr, addr->port)) + return true; + } + + return false; +} + +/* Fill all the remote addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, + struct mptcp_addr_info *addrs) +{ + struct sock *sk = (struct sock *)msk, *ssk; + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info remote = { 0 }; + unsigned int subflows_max; + int i = 0; + + subflows_max = mptcp_pm_get_subflows_max(msk); + + /* Non-fullmesh endpoint, fill in the single entry + * corresponding to the primary MPC subflow remote address + */ + if (!fullmesh) { + remote_address((struct sock_common *)sk, &remote); + msk->pm.subflows++; + addrs[i++] = remote; + } else { + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + remote_address((struct sock_common *)ssk, &remote); + if (!lookup_address_in_vec(addrs, i, &remote) && + msk->pm.subflows < subflows_max) { + msk->pm.subflows++; + addrs[i++] = remote; + } + } + } + + return i; +} + static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -455,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) !READ_ONCE(msk->pm.remote_deny_join_id0)) { local = select_local_address(pernet, msk); if (local) { - struct mptcp_addr_info remote = { 0 }; + bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + int i, nr; msk->pm.local_addr_used++; - msk->pm.subflows++; check_work_pending(msk); - remote_address((struct sock_common *)sk, &remote); + nr = fill_remote_addresses_vec(msk, fullmesh, addrs); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local->addr, &remote, - local->flags, local->ifindex); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); return; } @@ -484,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) mptcp_pm_create_subflow_or_signal_addr(msk); } +/* Fill all the local addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *addrs) +{ + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info local; + struct pm_nl_pernet *pernet; + unsigned int subflows_max; + int i = 0; + + pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + subflows_max = mptcp_pm_get_subflows_max(msk); + + rcu_read_lock(); + __mptcp_flush_join_list(msk); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) + continue; + + if (entry->addr.family != sk->sk_family) { +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if ((entry->addr.family == AF_INET && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || + (sk->sk_family == AF_INET && + !ipv6_addr_v4mapped(&entry->addr.addr6))) +#endif + continue; + } + + if (msk->pm.subflows < subflows_max) { + msk->pm.subflows++; + addrs[i++] = entry->addr; + } + } + rcu_read_unlock(); + + /* If the array is empty, fill in the single + * 'IPADDRANY' local address + */ + if (!i) { + memset(&local, 0, sizeof(local)); + local.family = msk->pm.remote.family; + + msk->pm.subflows++; + addrs[i++] = local; + } + + return i; +} + static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; struct mptcp_addr_info remote; - struct mptcp_addr_info local; unsigned int subflows_max; + int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); @@ -502,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) goto add_addr_echo; - msk->pm.add_addr_accepted++; - msk->pm.subflows++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; - memset(&local, 0, sizeof(local)); - local.family = remote.family; + nr = fill_local_addresses_vec(msk, addrs); + + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local, &remote, 0, 0); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &addrs[i], &remote); spin_lock_bh(&msk->pm.lock); add_addr_echo: @@ -1105,6 +1208,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, + u8 *flags, int *ifindex) +{ + struct mptcp_pm_addr_entry *entry; + + *flags = 0; + *ifindex = 0; + + if (id) { + rcu_read_lock(); + entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id); + if (entry) { + *flags = entry->flags; + *ifindex = entry->ifindex; + } + rcu_read_unlock(); + } + + return 0; +} + static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 8bdd038def383..bc1bfd7ac9c14 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -577,8 +577,7 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote, - u8 flags, int ifindex); + const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, @@ -733,6 +732,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr); +int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, + u8 *flags, int *ifindex); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1151926d335b2..8c43aa14897a9 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1355,8 +1355,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, } int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote, - u8 flags, int ifindex) + const struct mptcp_addr_info *remote) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; @@ -1367,6 +1366,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, struct sock *ssk; u32 remote_token; int addrlen; + int ifindex; + u8 flags; int err; if (!mptcp_is_fully_established(sk)) @@ -1390,6 +1391,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, local_id = err; } + mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id, + &flags, &ifindex); subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 52762eaa2d8e3..8c7117e2c337d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -344,17 +344,18 @@ do_transfer() let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 + pos=1 dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then - id=${dump[1]} sleep 1 while [ $counter -le $rm_nr_ns1 ] do + id=${dump[$pos]} ip netns exec ${listener_ns} ./pm_nl_ctl del $id sleep 1 let counter+=1 - let id+=1 + let pos+=5 done fi elif [ $rm_nr_ns1 -eq 8 ]; then @@ -366,6 +367,12 @@ do_transfer() fi fi + flags="subflow" + if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then + flags="${flags},fullmesh" + addr_nr_ns2=${addr_nr_ns2:9} + fi + if [ $addr_nr_ns2 -gt 0 ]; then let add_nr_ns2=addr_nr_ns2 counter=3 @@ -377,7 +384,7 @@ do_transfer() else addr="10.0.$counter.2" fi - ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow + ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags let counter+=1 let add_nr_ns2-=1 done @@ -386,17 +393,18 @@ do_transfer() let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 + pos=1 dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then - id=${dump[1]} sleep 1 while [ $counter -le $rm_nr_ns2 ] do + id=${dump[$pos]} ip netns exec ${connector_ns} ./pm_nl_ctl del $id sleep 1 let counter+=1 - let id+=1 + let pos+=5 done fi elif [ $rm_nr_ns2 -eq 8 ]; then @@ -1686,6 +1694,55 @@ deny_join_id0_tests() chk_join_nr "subflow and address allow join id0 2" 1 1 1 } +fullmesh_tests() +{ + # fullmesh 1 + # 2 fullmesh addrs in ns2, added before the connection, + # 1 non-fullmesh addr in ns1, added during the connection. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 4 + ip netns exec $ns2 ./pm_nl_ctl limits 1 4 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh + run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + chk_join_nr "fullmesh test 2x1" 4 4 4 + chk_add_nr 1 1 + + # fullmesh 2 + # 1 non-fullmesh addr in ns1, added before the connection, + # 1 fullmesh addr in ns2, added during the connection. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow + chk_join_nr "fullmesh test 1x1" 3 3 3 + chk_add_nr 1 1 + + # fullmesh 3 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 5 + ip netns exec $ns2 ./pm_nl_ctl limits 1 5 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + chk_join_nr "fullmesh test 1x2" 5 5 5 + chk_add_nr 1 1 + + # fullmesh 4 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection, + # limit max_subflows to 4. + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 4 + ip netns exec $ns2 ./pm_nl_ctl limits 1 4 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + chk_join_nr "fullmesh test 1x2, limited" 4 4 4 + chk_add_nr 1 1 +} + all_tests() { subflows_tests @@ -1701,6 +1758,7 @@ all_tests() syncookies_tests checksum_tests deny_join_id0_tests + fullmesh_tests } usage() @@ -1719,6 +1777,7 @@ usage() echo " -k syncookies_tests" echo " -S checksum_tests" echo " -d deny_join_id0_tests" + echo " -m fullmesh_tests" echo " -c capture pcap files" echo " -C enable data checksum" echo " -h help" @@ -1754,7 +1813,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkdchCS' opt; do +while getopts 'fsltra64bpkdmchCS' opt; do case $opt in f) subflows_tests @@ -1795,6 +1854,9 @@ while getopts 'fsltra64bpkdchCS' opt; do d) deny_join_id0_tests ;; + m) + fullmesh_tests + ;; c) ;; C) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 115decfdc1ef7..354784512748a 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -25,7 +25,7 @@ static void syntax(char *argv[]) { fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); - fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); + fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); @@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; else error(1, errno, "unknown flag %s", argv[arg]); } + if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL && + flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + error(1, errno, "error flag fullmesh"); + } + rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; rta->rta_len = RTA_LENGTH(4); @@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + printf("fullmesh"); + flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); |