diff options
author | David S. Miller <davem@davemloft.net> | 2022-01-02 12:07:39 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-01-02 12:07:39 +0000 |
commit | ab6dd952b2d044d04a9906a997d16f1e6e4db48d (patch) | |
tree | 75e91032fac8767eeb60d9e822681587588662a6 /net/smc/smc_pnet.c | |
parent | e63a02348958cd7cc8c8401c94de57ad97b5d06c (diff) | |
parent | a838f5084828ac987af0903c523d6a2f7882705c (diff) |
Merge branch 'smc-RDMA-net-namespace'
Tony Lu says:
====================
RDMA device net namespace support for SMC
This patch set introduces net namespace support for linkgroups.
Path 1 is the main approach to implement net ns support.
Path 2 - 4 are the additional modifications to let us know the netns.
Also, I will submit changes of smc-tools to github later.
Currently, smc doesn't support net namespace isolation. The ibdevs
registered to smc are shared for all linkgroups and connections. When
running applications in different net namespaces, such as container
environment, applications should only use the ibdevs that belongs to the
same net namespace.
This adds a new field, net, in smc linkgroup struct. During first
contact, it checks and find the linkgroup has same net namespace, if
not, it is going to create and initialized the net field with first
link's ibdev net namespace. When finding the rdma devices, it also checks
the sk net device's and ibdev's net namespaces. After net namespace
destroyed, the net device and ibdev move to root net namespace,
linkgroups won't be matched, and wait for lgr free.
If rdma net namespace exclusive mode is not enabled, it behaves as
before.
Steps to enable and test net namespaces:
1. enable RDMA device net namespace exclusive support
rdma system set netns exclusive # default is shared
2. create new net namespace, move and initialize them
ip netns add test1
rdma dev set mlx5_1 netns test1
ip link set dev eth2 netns test1
ip netns exec test1 ip link set eth2 up
ip netns exec test1 ip addr add ${HOST_IP}/26 dev eth2
3. setup server and client, connect N <-> M
ip netns exec test1 smc_run sockperf server --tcp # server
ip netns exec test1 smc_run sockperf pp --tcp -i ${SERVER_IP} # client
4. netns isolated linkgroups (2 * 2 mesh) with their own linkgroups
- server
LG-ID LG-Role LG-Type VLAN #Conns PNET-ID
00000100 SERV SINGLE 0 0
00000200 SERV SINGLE 0 0
00000300 SERV SINGLE 0 0
00000400 SERV SINGLE 0 0
- client
LG-ID LG-Role LG-Type VLAN #Conns PNET-ID
00000100 CLNT SINGLE 0 0
00000200 CLNT SINGLE 0 0
00000300 CLNT SINGLE 0 0
00000400 CLNT SINGLE 0 0
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/smc/smc_pnet.c')
-rw-r--r-- | net/smc/smc_pnet.c | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index e171cc6483f84..db9825c01e0af 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -977,14 +977,16 @@ static int smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, /* find a roce device for the given pnetid */ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, - struct smc_ib_device *known_dev) + struct smc_ib_device *known_dev, + struct net *net) { struct smc_ib_device *ibdev; int i; mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { - if (ibdev == known_dev) + if (ibdev == known_dev || + !rdma_dev_access_netns(ibdev->ibdev, net)) continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) @@ -1001,12 +1003,14 @@ out: mutex_unlock(&smc_ib_devices.mutex); } -/* find alternate roce device with same pnet_id and vlan_id */ +/* find alternate roce device with same pnet_id, vlan_id and net namespace */ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev) { - _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); + struct net *net = lgr->net; + + _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net); } /* if handshake network device belongs to a roce device, return its @@ -1015,6 +1019,7 @@ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct smc_init_info *ini) { + struct net *net = dev_net(netdev); struct smc_ib_device *ibdev; mutex_lock(&smc_ib_devices.mutex); @@ -1022,6 +1027,10 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, struct net_device *ndev; int i; + /* check rdma net namespace */ + if (!rdma_dev_access_netns(ibdev->ibdev, net)) + continue; + for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; @@ -1052,15 +1061,17 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct net *net; ndev = pnet_find_base_ndev(ndev); + net = dev_net(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } - _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); + _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net); } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, |