diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-01 14:54:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-01 14:54:03 -0700 |
commit | e04360a2ea01bf42aa639b65aad81f502e896c7f (patch) | |
tree | 59a4f4999a0da1204744aee8888b7001f7cccabd /drivers/infiniband/core/cm.c | |
parent | 514798d36572fb8eba6ccff3de10c9615063a7f5 (diff) | |
parent | 3d8287544223a3d2f37981c1f9ffd94d0b5e9ffc (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This contains a replacement driver for Intel iWarp hardware. This new
driver supports the old ethernet hardware and also newer chips that
can do ROCE.
Other than that, this contains the typical mix of patches:
- Driver updates and cleanups for bnxt_re, cxgb4, mlx4, and mlx5
- Many static checker driven code clean ups, including a wide
refcount_t conversion
- Several series for the hns driver, more HIP09 HW capabilities,
migration to new HW register manipulators, and code cleanups
- Minor fixes and improvements in srp, rts, and cm
- Improvements throughout for sysfs related code to use
DEVICE_ATTR_*, make the ib_port sysfs first-class, and overall use
sysfs APIs properly
- Intel's new irdma driver replacing i40iw
- rxe general clean ups and Memory Window support"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (211 commits)
RDMA/core: Always release restrack object
RDMA/mlx5: Don't access NULL-cleared mpi pointer
RDMA/irdma: Fix potential overflow expression in irdma_prm_get_pbles
RDMA/irdma: Check contents of user-space irdma_mem_reg_req object
RDMA/rxe: Missing unlock on error in get_srq_wqe()
RDMA/cma: Fix rdma_resolve_route() memory leak
RDMA/core/sa_query: Remove unused argument
RDMA/cma: Fix incorrect Packet Lifetime calculation
RDMA/cma: Protect RMW with qp_mutex
RDMA/cma: Remove unnecessary INIT->INIT transition
RDMA/hns: Add window selection field of congestion control
RDMA/hfi1: Remove use of kmap()
RDMA/irdma: Remove use of kmap()
RDMA/bnxt_re: Fix uninitialized struct bit field rsvd1
IB/isert: Align target max I/O size to initiator size
RDMA/hns: Fix incorrect vlan enable bit in QPC
MAINTAINERS: Update Broadcom RDMA maintainers
RDMA/irdma: Use the queried port attributes
RDMA/rxe: Fix redundant skb_put_zero
RDMA/rxe: Fix extra copy in prepare_ack_packet
...
Diffstat (limited to 'drivers/infiniband/core/cm.c')
-rw-r--r-- | drivers/infiniband/core/cm.c | 863 |
1 files changed, 431 insertions, 432 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0ead0d223154..c903b74f46a4 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -25,6 +25,7 @@ #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> +#include <rdma/ib_sysfs.h> #include "cm_msgs.h" #include "core_priv.h" #include "cm_trace.h" @@ -121,8 +122,6 @@ static struct ib_cm { __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; - /* Sync on cm change port state */ - spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ @@ -150,66 +149,23 @@ enum { CM_COUNTER_GROUPS }; -static char const counter_group_names[CM_COUNTER_GROUPS] - [sizeof("cm_rx_duplicates")] = { - "cm_tx_msgs", "cm_tx_retries", - "cm_rx_msgs", "cm_rx_duplicates" -}; - -struct cm_counter_group { - struct kobject obj; - atomic_long_t counter[CM_ATTR_COUNT]; -}; - struct cm_counter_attribute { - struct attribute attr; - int index; -}; - -#define CM_COUNTER_ATTR(_name, _index) \ -struct cm_counter_attribute cm_##_name##_counter_attr = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .index = _index \ -} - -static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); -static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); -static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); -static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); -static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); -static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); -static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); -static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); -static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); -static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); -static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); - -static struct attribute *cm_counter_default_attrs[] = { - &cm_req_counter_attr.attr, - &cm_mra_counter_attr.attr, - &cm_rej_counter_attr.attr, - &cm_rep_counter_attr.attr, - &cm_rtu_counter_attr.attr, - &cm_dreq_counter_attr.attr, - &cm_drep_counter_attr.attr, - &cm_sidr_req_counter_attr.attr, - &cm_sidr_rep_counter_attr.attr, - &cm_lap_counter_attr.attr, - &cm_apr_counter_attr.attr, - NULL + struct ib_port_attribute attr; + unsigned short group; + unsigned short index; }; struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; u32 port_num; - struct list_head cm_priv_prim_list; - struct list_head cm_priv_altr_list; - struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; + atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT]; }; struct cm_device { + struct kref kref; struct list_head list; + spinlock_t mad_agent_lock; struct ib_device *ib_device; u8 ack_delay; int going_down; @@ -218,7 +174,6 @@ struct cm_device { struct cm_av { struct cm_port *port; - union ib_gid dgid; struct rdma_ah_attr ah_attr; u16 pkey_index; u8 timeout; @@ -251,6 +206,7 @@ struct cm_id_private { struct rb_node service_node; struct rb_node sidr_id_node; + u32 sidr_slid; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; refcount_t refcount; @@ -285,18 +241,28 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; - struct list_head prim_list; - struct list_head altr_list; - /* Indicates that the send port mad is registered and av is set */ - int prim_send_port_not_ready; - int altr_send_port_not_ready; - struct list_head work_list; atomic_t work_count; struct rdma_ucm_ece ece; }; +static void cm_dev_release(struct kref *kref) +{ + struct cm_device *cm_dev = container_of(kref, struct cm_device, kref); + u32 i; + + rdma_for_each_port(cm_dev->ib_device, i) + kfree(cm_dev->port[i - 1]); + + kfree(cm_dev); +} + +static void cm_device_put(struct cm_device *cm_dev) +{ + kref_put(&cm_dev->kref, cm_dev_release); +} + static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) @@ -305,52 +271,37 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv) complete(&cm_id_priv->comp); } -static int cm_alloc_msg(struct cm_id_private *cm_id_priv, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) { struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; - struct cm_av *av; - unsigned long flags, flags2; - int ret = 0; - /* don't let the port to be released till the agent is down */ - spin_lock_irqsave(&cm.state_lock, flags2); - spin_lock_irqsave(&cm.lock, flags); - if (!cm_id_priv->prim_send_port_not_ready) - av = &cm_id_priv->av; - else if (!cm_id_priv->altr_send_port_not_ready && - (cm_id_priv->alt_av.port)) - av = &cm_id_priv->alt_av; - else { - pr_info("%s: not valid CM id\n", __func__); - ret = -ENODEV; - spin_unlock_irqrestore(&cm.lock, flags); - goto out; - } - spin_unlock_irqrestore(&cm.lock, flags); - /* Make sure the port haven't released the mad yet */ + lockdep_assert_held(&cm_id_priv->lock); + + if (!cm_id_priv->av.port) + return ERR_PTR(-EINVAL); + + spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { - pr_info("%s: not a valid MAD agent\n", __func__); - ret = -ENODEV; + m = ERR_PTR(-EINVAL); goto out; } - ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0); + + ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0); if (IS_ERR(ah)) { - ret = PTR_ERR(ah); + m = ERR_CAST(ah); goto out; } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - av->pkey_index, + cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { rdma_destroy_ah(ah, 0); - ret = PTR_ERR(m); goto out; } @@ -360,11 +311,49 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, refcount_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; - *msg = m; out: - spin_unlock_irqrestore(&cm.state_lock, flags2); - return ret; + spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + return m; +} + +static void cm_free_msg(struct ib_mad_send_buf *msg) +{ + struct cm_id_private *cm_id_priv = msg->context[0]; + + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + cm_deref_id(cm_id_priv); + ib_free_send_mad(msg); +} + +static struct ib_mad_send_buf * +cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) +{ + struct ib_mad_send_buf *msg; + + lockdep_assert_held(&cm_id_priv->lock); + + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return msg; + cm_id_priv->msg = msg; + return msg; +} + +static void cm_free_priv_msg(struct ib_mad_send_buf *msg) +{ + struct cm_id_private *cm_id_priv = msg->context[0]; + + lockdep_assert_held(&cm_id_priv->lock); + + if (!WARN_ON(cm_id_priv->msg != msg)) + cm_id_priv->msg = NULL; + + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + cm_deref_id(cm_id_priv); + ib_free_send_mad(msg); } static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port, @@ -391,15 +380,6 @@ static int cm_create_response_msg_ah(struct cm_port *port, return 0; } -static void cm_free_msg(struct ib_mad_send_buf *msg) -{ - if (msg->ah) - rdma_destroy_ah(msg->ah, 0); - if (msg->context[0]) - cm_deref_id(msg->context[0]); - ib_free_send_mad(msg); -} - static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, struct ib_mad_send_buf **msg) @@ -413,7 +393,7 @@ static int cm_alloc_response_msg(struct cm_port *port, ret = cm_create_response_msg_ah(port, mad_recv_wc, m); if (ret) { - cm_free_msg(m); + ib_free_send_mad(m); return ret; } @@ -421,6 +401,13 @@ static int cm_alloc_response_msg(struct cm_port *port, return 0; } +static void cm_free_response_msg(struct ib_mad_send_buf *msg) +{ + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + ib_free_send_mad(msg); +} + static void *cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; @@ -445,57 +432,38 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } -static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, - struct ib_grh *grh, struct cm_av *av) +static void cm_set_av_port(struct cm_av *av, struct cm_port *port) { - struct rdma_ah_attr new_ah_attr; - int ret; + struct cm_port *old_port = av->port; - av->port = port; - av->pkey_index = wc->pkey_index; + if (old_port == port) + return; - /* - * av->ah_attr might be initialized based on past wc during incoming - * connect request or while sending out connect request. So initialize - * a new ah_attr on stack. If initialization fails, old ah_attr is - * used for sending any responses. If initialization is successful, - * than new ah_attr is used by overwriting old one. - */ - ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device, - port->port_num, wc, - grh, &new_ah_attr); - if (ret) - return ret; + av->port = port; + if (old_port) + cm_device_put(old_port->cm_dev); + if (port) + kref_get(&port->cm_dev->kref); +} - rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); - return 0; +static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, + struct rdma_ah_attr *ah_attr, struct cm_av *av) +{ + cm_set_av_port(av, port); + av->pkey_index = wc->pkey_index; + rdma_move_ah_attr(&av->ah_attr, ah_attr); } static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { - av->port = port; + cm_set_av_port(av, port); av->pkey_index = wc->pkey_index; return ib_init_ah_attr_from_wc(port->cm_dev->ib_device, port->port_num, wc, grh, &av->ah_attr); } -static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, - struct cm_av *av, struct cm_port *port) -{ - unsigned long flags; - - spin_lock_irqsave(&cm.lock, flags); - if (&cm_id_priv->av == av) - list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); - else if (&cm_id_priv->alt_av == av) - list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); - else - WARN_ON(true); - spin_unlock_irqrestore(&cm.lock, flags); -} - static struct cm_port * get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { @@ -539,8 +507,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) static int cm_init_av_by_path(struct sa_path_rec *path, const struct ib_gid_attr *sgid_attr, - struct cm_av *av, - struct cm_id_private *cm_id_priv) + struct cm_av *av) { struct rdma_ah_attr new_ah_attr; struct cm_device *cm_dev; @@ -557,7 +524,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, if (ret) return ret; - av->port = port; + cm_set_av_port(av, port); /* * av->ah_attr might be initialized based on wc or during @@ -574,11 +541,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return ret; av->timeout = path->packet_life_time + 1; - add_cm_id_to_port_list(cm_id_priv, av, port); rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } +/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */ +static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src) +{ + cm_set_av_port(dest, src->port); + cm_set_av_port(src, NULL); + dest->pkey_index = src->pkey_index; + rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr); + dest->timeout = src->timeout; +} + +static void cm_destroy_av(struct cm_av *av) +{ + rdma_destroy_ah_attr(&av->ah_attr); + cm_set_av_port(av, NULL); +} + static u32 cm_local_id(__be32 local_id) { return (__force u32) (local_id ^ cm.random_id_operand); @@ -803,7 +785,6 @@ cm_insert_remote_sidr(struct cm_id_private *cm_id_priv) struct rb_node **link = &cm.remote_sidr_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; - union ib_gid *port_gid = &cm_id_priv->av.dgid; __be32 remote_id = cm_id_priv->id.remote_id; while (*link) { @@ -815,12 +796,9 @@ cm_insert_remote_sidr(struct cm_id_private *cm_id_priv) else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_right; else { - int cmp; - cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, - sizeof *port_gid); - if (cmp < 0) + if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid) link = &(*link)->rb_left; - else if (cmp > 0) + else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid) link = &(*link)->rb_right; else return cur_cm_id_priv; @@ -854,8 +832,6 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device, spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); - INIT_LIST_HEAD(&cm_id_priv->prim_list); - INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); refcount_set(&cm_id_priv->refcount, 1); @@ -1082,7 +1058,7 @@ retest: break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_SIDR_REQ_RCVD: cm_send_sidr_rep_locked(cm_id_priv, @@ -1093,7 +1069,7 @@ retest: break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof(cm_id_priv->id.device->node_guid), @@ -1111,7 +1087,7 @@ retest: break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); goto retest; @@ -1129,7 +1105,7 @@ retest: cm_send_dreq_locked(cm_id_priv, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_enter_timewait(cm_id_priv); goto retest; case IB_CM_DREQ_RCVD: @@ -1156,12 +1132,7 @@ retest: kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } - if (!list_empty(&cm_id_priv->altr_list) && - (!cm_id_priv->altr_send_port_not_ready)) - list_del(&cm_id_priv->altr_list); - if (!list_empty(&cm_id_priv->prim_list) && - (!cm_id_priv->prim_send_port_not_ready)) - list_del(&cm_id_priv->prim_list); + WARN_ON(cm_id_priv->listen_sharecount); WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node)); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) @@ -1175,8 +1146,8 @@ retest: while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); - rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); + cm_destroy_av(&cm_id_priv->av); + cm_destroy_av(&cm_id_priv->alt_av); kfree(cm_id_priv->private_data); kfree_rcu(cm_id_priv, rcu); } @@ -1308,10 +1279,18 @@ EXPORT_SYMBOL(ib_cm_insert_listen); static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { - u64 hi_tid, low_tid; + u64 hi_tid = 0, low_tid; - hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64)cm_id_priv->id.local_id; + lockdep_assert_held(&cm_id_priv->lock); + + low_tid = (u64)cm_id_priv->id.local_id; + if (!cm_id_priv->av.port) + return cpu_to_be64(low_tid); + + spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + if (cm_id_priv->av.port->mad_agent) + hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32; + spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return cpu_to_be64(hi_tid | low_tid); } @@ -1500,7 +1479,9 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { + struct cm_av av = {}, alt_av = {}; struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -1514,8 +1495,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto out; + return -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1524,19 +1504,20 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; - goto out; + return ret; } ret = cm_init_av_by_path(param->primary_path, - param->ppath_sgid_attr, &cm_id_priv->av, - cm_id_priv); + param->ppath_sgid_attr, &av); if (ret) - goto out; + return ret; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av, cm_id_priv); - if (ret) - goto out; + &alt_av); + if (ret) { + cm_destroy_av(&av); + return ret; + } } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); @@ -1552,33 +1533,40 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; - ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); - if (ret) - goto out; + spin_lock_irqsave(&cm_id_priv->lock, flags); + + cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->alternate_path) + cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); + + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out_unlock; + } - req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; + req_msg = (struct cm_req_msg *)msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; - cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT; cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); trace_icm_send_req(&cm_id_priv->id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - goto error2; - } + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto out_free; BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; - -error2: cm_free_msg(cm_id_priv->msg); -out: return ret; +out_free: + cm_free_priv_msg(msg); +out_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -1618,7 +1606,7 @@ static int cm_issue_rej(struct cm_port *port, IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_msg(msg); + cm_free_response_msg(msg); return ret; } @@ -1757,7 +1745,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work) ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey); if (ret) { - dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n", + dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n", port_num, pkey_index, ret); return 0; } @@ -1934,8 +1922,8 @@ static void cm_dup_req_handler(struct cm_work *work, struct ib_mad_send_buf *msg = NULL; int ret; - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_REQ_COUNTER]); + atomic_long_inc( + &work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]); /* Quick state check to discard duplicate REQs. */ spin_lock_irq(&cm_id_priv->lock); @@ -1974,7 +1962,7 @@ static void cm_dup_req_handler(struct cm_work *work, return; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); +free: cm_free_response_msg(msg); } static struct cm_id_private *cm_match_req(struct cm_work *work, @@ -2163,8 +2151,10 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, - cm_id_priv); + + /* This destroy call is needed to pair with cm_init_av_for_response */ + cm_destroy_av(&cm_id_priv->av); + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av); if (ret) { int err; @@ -2183,7 +2173,7 @@ static int cm_req_handler(struct cm_work *work) } if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, - &cm_id_priv->alt_av, cm_id_priv); + &cm_id_priv->alt_av); if (ret) { ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_ALT_GID, @@ -2283,9 +2273,11 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, goto out; } - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); goto out; + } rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); @@ -2294,14 +2286,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, trace_icm_send_rep(cm_id); ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } + if (ret) + goto out_free; cm_id->state = IB_CM_REP_SENT; - cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); @@ -2309,8 +2297,13 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, "IBTA declares QPN to be 24 bits, but it is 0x%X\n", param->qp_num); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +out_free: + cm_free_priv_msg(msg); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rep); @@ -2357,9 +2350,11 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, goto error; } - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); goto error; + } cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); @@ -2426,8 +2421,8 @@ static void cm_dup_rep_handler(struct cm_work *work) if (!cm_id_priv) return; - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_REP_COUNTER]); + atomic_long_inc( + &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; @@ -2453,7 +2448,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); +free: cm_free_response_msg(msg); deref: cm_deref_id(cm_id_priv); } @@ -2553,7 +2548,7 @@ static int cm_rep_handler(struct cm_work *work) cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; @@ -2577,7 +2572,7 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2604,13 +2599,13 @@ static int cm_rtu_handler(struct cm_work *work) if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_RTU_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2655,12 +2650,12 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) { + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { cm_enter_timewait(cm_id_priv); - return ret; + return PTR_ERR(msg); } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, @@ -2672,12 +2667,11 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); - cm_free_msg(msg); + cm_free_priv_msg(msg); return ret; } cm_id_priv->id.state = IB_CM_DREQ_SENT; - cm_id_priv->msg = msg; return 0; } @@ -2732,9 +2726,9 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, cm_set_private_data(cm_id_priv, private_data, private_data_len); cm_enter_timewait(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); @@ -2794,7 +2788,7 @@ static int cm_issue_drep(struct cm_port *port, IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_msg(msg); + cm_free_response_msg(msg); return ret; } @@ -2810,8 +2804,8 @@ static int cm_dreq_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)), cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg))); if (!cm_id_priv) { - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_DREQ_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); trace_icm_no_priv_err( IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), @@ -2830,18 +2824,18 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_DREQ_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_DREQ_COUNTER]); msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); if (IS_ERR(msg)) goto unlock; @@ -2853,11 +2847,11 @@ static int cm_dreq_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); + cm_free_response_msg(msg); goto deref; case IB_CM_DREQ_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_DREQ_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_DREQ_COUNTER]); goto unlock; default: trace_icm_dreq_unknown_err(&cm_id_priv->id); @@ -2896,7 +2890,7 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2927,9 +2921,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: cm_reset_to_idle(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len, state); @@ -2937,9 +2931,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_enter_timewait(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len, state); @@ -3032,7 +3026,7 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); fallthrough; case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -3042,7 +3036,7 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); fallthrough; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -3052,8 +3046,7 @@ static int cm_rej_handler(struct cm_work *work) if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_enter_timewait(cm_id_priv); break; } @@ -3117,13 +3110,15 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, default: trace_icm_send_mra_unknown_err(&cm_id_priv->id); ret = -EINVAL; - goto error1; + goto error_unlock; } if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error1; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto error_unlock; + } cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, @@ -3131,7 +3126,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, trace_icm_send_mra(cm_id); ret = ib_post_send_mad(msg, NULL); if (ret) - goto error2; + goto error_free_msg; } cm_id->state = cm_state; @@ -3141,13 +3136,11 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; -error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - return ret; - -error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); +error_free_msg: cm_free_msg(msg); +error_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); return ret; } EXPORT_SYMBOL(ib_send_cm_mra); @@ -3192,16 +3185,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_REQ || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + ib_modify_mad(cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_REP || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + ib_modify_mad(cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -3209,20 +3200,19 @@ static int cm_mra_handler(struct cm_work *work) if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) { + ib_modify_mad(cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - atomic_long_inc(&work->port-> - counter_group[CM_RECV_DUPLICATES]. - counter[CM_MRA_COUNTER]); + atomic_long_inc( + &work->port->counters[CM_RECV_DUPLICATES] + [CM_MRA_COUNTER]); goto out; } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; case IB_CM_MRA_REQ_RCVD: case IB_CM_MRA_REP_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_MRA_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_MRA_COUNTER]); fallthrough; default: trace_icm_mra_unknown_err(&cm_id_priv->id); @@ -3291,6 +3281,8 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; + struct rdma_ah_attr ah_attr; + struct cm_av alt_av = {}; int ret; /* Currently Alternate path messages are not supported for @@ -3319,7 +3311,25 @@ static int cm_lap_handler(struct cm_work *work) work->cm_event.private_data = IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg); + ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device, + work->port->port_num, + work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &ah_attr); + if (ret) + goto deref; + + ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); + if (ret) { + rdma_destroy_ah_attr(&ah_attr); + return -EINVAL; + } + spin_lock_irq(&cm_id_priv->lock); + cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, + &ah_attr, &cm_id_priv->av); + cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; @@ -3328,8 +3338,8 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_LAP_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_LAP_COUNTER]); msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); if (IS_ERR(msg)) goto unlock; @@ -3343,27 +3353,16 @@ static int cm_lap_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); + cm_free_response_msg(msg); goto deref; case IB_CM_LAP_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_LAP_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_LAP_COUNTER]); goto unlock; default: goto unlock; } - ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - if (ret) - goto unlock; - - ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av, cm_id_priv); - if (ret) - goto unlock; - cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; cm_queue_work_unlock(cm_id_priv, work); @@ -3410,8 +3409,7 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - cm_id_priv->msg = NULL; + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -3471,6 +3469,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct cm_av av = {}; unsigned long flags; int ret; @@ -3479,42 +3478,43 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, param->sgid_attr, - &cm_id_priv->av, - cm_id_priv); + ret = cm_init_av_by_path(param->path, param->sgid_attr, &av); if (ret) - goto out; + return ret; + spin_lock_irqsave(&cm_id_priv->lock, flags); + cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, - param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; - - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state == IB_CM_IDLE) { - trace_icm_send_sidr_req(&cm_id_priv->id); - ret = ib_post_send_mad(msg, NULL); - } else { + if (cm_id->state != IB_CM_IDLE) { ret = -EINVAL; + goto out_unlock; } - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - goto out; + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out_unlock; } + + cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv, + param); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT; + + trace_icm_send_sidr_req(&cm_id_priv->id); + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto out_free; cm_id->state = IB_CM_SIDR_REQ_SENT; - cm_id_priv->msg = msg; spin_unlock_irqrestore(&cm_id_priv->lock, flags); -out: + return 0; +out_free: + cm_free_priv_msg(msg); +out_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); @@ -3564,8 +3564,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cm_id_priv->tid = sidr_req_msg->hdr.tid; wc = work->mad_recv_wc->wc; - cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); - cm_id_priv->av.dgid.global.interface_id = 0; + cm_id_priv->sidr_slid = wc->slid; ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); @@ -3576,8 +3575,8 @@ static int cm_sidr_req_handler(struct cm_work *work) listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (listen_cm_id_priv) { spin_unlock_irq(&cm.lock); - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. - counter[CM_SIDR_REQ_COUNTER]); + atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] + [CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; @@ -3661,9 +3660,9 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD) return -EINVAL; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); @@ -3737,7 +3736,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work, cm_id_priv); @@ -3748,22 +3747,26 @@ out: return -EINVAL; } -static void cm_process_send_error(struct ib_mad_send_buf *msg, +static void cm_process_send_error(struct cm_id_private *cm_id_priv, + struct ib_mad_send_buf *msg, + enum ib_cm_state state, enum ib_wc_status wc_status) { - struct cm_id_private *cm_id_priv; - struct ib_cm_event cm_event; - enum ib_cm_state state; + struct ib_cm_event cm_event = {}; int ret; - memset(&cm_event, 0, sizeof cm_event); - cm_id_priv = msg->context[0]; - /* Discard old sends or ones without a response. */ spin_lock_irq(&cm_id_priv->lock); - state = (enum ib_cm_state) (unsigned long) msg->context[1]; - if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) - goto discard; + if (msg != cm_id_priv->msg) { + spin_unlock_irq(&cm_id_priv->lock); + cm_free_msg(msg); + return; + } + cm_free_priv_msg(msg); + + if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS || + wc_status == IB_WC_WR_FLUSH_ERR) + goto out_unlock; trace_icm_mad_send_err(state, wc_status); switch (state) { @@ -3786,26 +3789,27 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, cm_event.event = IB_CM_SIDR_REQ_ERROR; break; default: - goto discard; + goto out_unlock; } spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); - cm_free_msg(msg); if (ret) ib_destroy_cm_id(&cm_id_priv->id); return; -discard: +out_unlock: spin_unlock_irq(&cm_id_priv->lock); - cm_free_msg(msg); } static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_id_private *cm_id_priv = msg->context[0]; + enum ib_cm_state state = + (enum ib_cm_state)(unsigned long)msg->context[1]; struct cm_port *port; u16 attr_index; @@ -3818,28 +3822,19 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, * set to a cm_id), and is not a REJ, then it is a send that was * manually retried. */ - if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + if (!cm_id_priv && (attr_index != CM_REJ_COUNTER)) msg->retries = 1; - atomic_long_add(1 + msg->retries, - &port->counter_group[CM_XMIT].counter[attr_index]); + atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]); if (msg->retries) atomic_long_add(msg->retries, - &port->counter_group[CM_XMIT_RETRIES]. - counter[attr_index]); + &port->counters[CM_XMIT_RETRIES][attr_index]); - switch (mad_send_wc->status) { - case IB_WC_SUCCESS: - case IB_WC_WR_FLUSH_ERR: - cm_free_msg(msg); - break; - default: - if (msg->context[0] && msg->context[1]) - cm_process_send_error(msg, mad_send_wc->status); - else - cm_free_msg(msg); - break; - } + if (cm_id_priv) + cm_process_send_error(cm_id_priv, msg, state, + mad_send_wc->status); + else + cm_free_response_msg(msg); } static void cm_work_handler(struct work_struct *_work) @@ -3963,9 +3958,7 @@ out: static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; - struct cm_av tmp_av; unsigned long flags; - int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -3974,14 +3967,7 @@ static int cm_migrate(struct ib_cm_id *cm_id) (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; - /* Swap address vector */ - tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; - cm_id_priv->alt_av = tmp_av; - /* Swap port send ready state */ - tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; - cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; - cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -4063,8 +4049,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, } attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); - atomic_long_inc(&port->counter_group[CM_RECV]. - counter[attr_id - CM_ATTR_ID_OFFSET]); + atomic_long_inc(&port->counters[CM_RECV][attr_id - CM_ATTR_ID_OFFSET]); work = kmalloc(struct_size(work, path, paths), GFP_KERNEL); if (!work) { @@ -4116,7 +4101,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; - qp_attr->port_num = cm_id_priv->av.port->port_num; + if (cm_id_priv->av.port) + qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; break; default: @@ -4158,7 +4144,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } - if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) { + if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) && + cm_id_priv->alt_av.port) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; @@ -4219,7 +4206,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, } } else { *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; - qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + if (cm_id_priv->alt_av.port) + qp_attr->alt_port_num = + cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -4262,59 +4251,74 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); -static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, - char *buf) +static ssize_t cm_show_counter(struct ib_device *ibdev, u32 port_num, + struct ib_port_attribute *attr, char *buf) { - struct cm_counter_group *group; - struct cm_counter_attribute *cm_attr; + struct cm_counter_attribute *cm_attr = + container_of(attr, struct cm_counter_attribute, attr); + struct cm_device *cm_dev = ib_get_client_data(ibdev, &cm_client); - group = container_of(obj, struct cm_counter_group, obj); - cm_attr = container_of(attr, struct cm_counter_attribute, attr); + if (WARN_ON(!cm_dev)) + return -EINVAL; - return sysfs_emit(buf, "%ld\n", - atomic_long_read(&group->counter[cm_attr->index])); + return sysfs_emit( + buf, "%ld\n", + atomic_long_read( + &cm_dev->port[port_num - 1] + ->counters[cm_attr->group][cm_attr->index])); } -static const struct sysfs_ops cm_counter_ops = { - .show = cm_show_counter -}; - -static struct kobj_type cm_counter_obj_type = { - .sysfs_ops = &cm_counter_ops, - .default_attrs = cm_counter_default_attrs -}; - -static int cm_create_port_fs(struct cm_port *port) -{ - int i, ret; - - for (i = 0; i < CM_COUNTER_GROUPS; i++) { - ret = ib_port_register_module_stat(port->cm_dev->ib_device, - port->port_num, - &port->counter_group[i].obj, - &cm_counter_obj_type, - counter_group_names[i]); - if (ret) - goto error; +#define CM_COUNTER_ATTR(_name, _group, _index) \ + { \ + .attr = __ATTR(_name, 0444, cm_show_counter, NULL), \ + .group = _group, .index = _index \ } - return 0; - -error: - while (i--) - ib_port_unregister_module_stat(&port->counter_group[i].obj); - return ret; - -} - -static void cm_remove_port_fs(struct cm_port *port) -{ - int i; - - for (i = 0; i < CM_COUNTER_GROUPS; i++) - ib_port_unregister_module_stat(&port->counter_group[i].obj); +#define CM_COUNTER_GROUP(_group, _name) \ + static struct cm_counter_attribute cm_counter_attr_##_group[] = { \ + CM_COUNTER_ATTR(req, _group, CM_REQ_COUNTER), \ + CM_COUNTER_ATTR(mra, _group, CM_MRA_COUNTER), \ + CM_COUNTER_ATTR(rej, _group, CM_REJ_COUNTER), \ + CM_COUNTER_ATTR(rep, _group, CM_REP_COUNTER), \ + CM_COUNTER_ATTR(rtu, _group, CM_RTU_COUNTER), \ + CM_COUNTER_ATTR(dreq, _group, CM_DREQ_COUNTER), \ + CM_COUNTER_ATTR(drep, _group, CM_DREP_COUNTER), \ + CM_COUNTER_ATTR(sidr_req, _group, CM_SIDR_REQ_COUNTER), \ + CM_COUNTER_ATTR(sidr_rep, _group, CM_SIDR_REP_COUNTER), \ + CM_COUNTER_ATTR(lap, _group, CM_LAP_COUNTER), \ + CM_COUNTER_ATTR(apr, _group, CM_APR_COUNTER), \ + }; \ + static struct attribute *cm_counter_attrs_##_group[] = { \ + &cm_counter_attr_##_group[0].attr.attr, \ + &cm_counter_attr_##_group[1].attr.attr, \ + &cm_counter_attr_##_group[2].attr.attr, \ + &cm_counter_attr_##_group[3].attr.attr, \ + &cm_counter_attr_##_group[4].attr.attr, \ + &cm_counter_attr_##_group[5].attr.attr, \ + &cm_counter_attr_##_group[6].attr.attr, \ + &cm_counter_attr_##_group[7].attr.attr, \ + &cm_counter_attr_##_group[8].attr.attr, \ + &cm_counter_attr_##_group[9].attr.attr, \ + &cm_counter_attr_##_group[10].attr.attr, \ + NULL, \ + }; \ + static const struct attribute_group cm_counter_group_##_group = { \ + .name = _name, \ + .attrs = cm_counter_attrs_##_group, \ + }; -} +CM_COUNTER_GROUP(CM_XMIT, "cm_tx_msgs") +CM_COUNTER_GROUP(CM_XMIT_RETRIES, "cm_tx_retries") +CM_COUNTER_GROUP(CM_RECV, "cm_rx_msgs") +CM_COUNTER_GROUP(CM_RECV_DUPLICATES, "cm_rx_duplicates") + +static const struct attribute_group *cm_counter_groups[] = { + &cm_counter_group_CM_XMIT, + &cm_counter_group_CM_XMIT_RETRIES, + &cm_counter_group_CM_RECV, + &cm_counter_group_CM_RECV_DUPLICATES, + NULL, +}; static int cm_add_one(struct ib_device *ib_device) { @@ -4337,10 +4341,14 @@ static int cm_add_one(struct ib_device *ib_device) if (!cm_dev) return -ENOMEM; + kref_init(&cm_dev->kref); + spin_lock_init(&cm_dev->mad_agent_lock); cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; + ib_set_client_data(ib_device, &cm_client, cm_dev); + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) @@ -4356,10 +4364,8 @@ static int cm_add_one(struct ib_device *ib_device) port->cm_dev = cm_dev; port->port_num = i; - INIT_LIST_HEAD(&port->cm_priv_prim_list); - INIT_LIST_HEAD(&port->cm_priv_altr_list); - - ret = cm_create_port_fs(port); + ret = ib_port_register_client_groups(ib_device, i, + cm_counter_groups); if (ret) goto error1; @@ -4388,8 +4394,6 @@ static int cm_add_one(struct ib_device *ib_device) goto free; } - ib_set_client_data(ib_device, &cm_client, cm_dev); - write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); write_unlock_irqrestore(&cm.device_lock, flags); @@ -4398,11 +4402,10 @@ static int cm_add_one(struct ib_device *ib_device) error3: ib_unregister_mad_agent(port->mad_agent); error2: - cm_remove_port_fs(port); + ib_port_unregister_client_groups(ib_device, i, cm_counter_groups); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; - kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4410,11 +4413,11 @@ error1: port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); - cm_remove_port_fs(port); - kfree(port); + ib_port_unregister_client_groups(ib_device, i, + cm_counter_groups); } free: - kfree(cm_dev); + cm_device_put(cm_dev); return ret; } @@ -4422,8 +4425,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; - struct cm_id_private *cm_id_priv; - struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; @@ -4439,34 +4440,33 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.lock); rdma_for_each_port (ib_device, i) { + struct ib_mad_agent *mad_agent; + if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; + mad_agent = port->mad_agent; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - /* Mark all the cm_id's as not valid */ - spin_lock_irq(&cm.lock); - list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) - cm_id_priv->altr_send_port_not_ready = 1; - list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) - cm_id_priv->prim_send_port_not_ready = 1; - spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - spin_lock_irq(&cm.state_lock); - cur_mad_agent = port->mad_agent; + /* + * The above ensures no call paths from the work are running, + * the remaining paths all take the mad_agent_lock. + */ + spin_lock(&cm_dev->mad_agent_lock); port->mad_agent = NULL; - spin_unlock_irq(&cm.state_lock); - ib_unregister_mad_agent(cur_mad_agent); - cm_remove_port_fs(port); - kfree(port); + spin_unlock(&cm_dev->mad_agent_lock); + ib_unregister_mad_agent(mad_agent); + ib_port_unregister_client_groups(ib_device, i, + cm_counter_groups); } - kfree(cm_dev); + cm_device_put(cm_dev); } static int __init ib_cm_init(void) @@ -4476,7 +4476,6 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); - spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; |