summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunxian Huang <huangjunxian6@hisilicon.com>2025-07-03 19:39:02 +0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-08-15 12:13:53 +0200
commit1209e33fe3afb6d9e543f963d41b30cfc04538ff (patch)
tree92da4acccfef4e7b0f8cf6baa7dc818e3c7e64c8
parent9dea08eac4f6d6fbbae59992978252e2edab995d (diff)
RDMA/hns: Get message length of ack_req from FW
[ Upstream commit 2c2ec0106c0f1f12d4eefd11de318ac47557a750 ] ACK_REQ_FREQ indicates the number of packets (after MTU fragmentation) HW sends before setting an ACK request. When MTU is greater than or equal to 1024, the current ACK_REQ_FREQ value causes HW to request an ACK for every MTU fragment. The processing of a large number of ACKs severely impacts HW performance when sending large size payloads. Get message length of ack_req from FW so that we can adjust this parameter according to different situations. There are several constraints for ACK_REQ_FREQ: 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise it may cause some unexpected retries when sending large payload. 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI. 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP or HC3 congestion control algorithm. Fixes: 56518a603fd2 ("RDMA/hns: Modify the value of long message loopback slice") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Link: https://patch.msgid.link/20250703113905.3597124-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c45
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h8
3 files changed, 43 insertions, 11 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 560a1d9de408..cbe73d9ad525 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -856,6 +856,7 @@ struct hns_roce_caps {
u16 default_ceq_arm_st;
u8 cong_cap;
enum hns_roce_cong_type default_cong_type;
+ u32 max_ack_req_msg_len;
};
enum hns_roce_device_state {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 6c4e0ea20224..5745231f9e3c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2181,31 +2181,36 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM];
+ struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM] = {};
struct hns_roce_caps *caps = &hr_dev->caps;
struct hns_roce_query_pf_caps_a *resp_a;
struct hns_roce_query_pf_caps_b *resp_b;
struct hns_roce_query_pf_caps_c *resp_c;
struct hns_roce_query_pf_caps_d *resp_d;
struct hns_roce_query_pf_caps_e *resp_e;
+ struct hns_roce_query_pf_caps_f *resp_f;
enum hns_roce_opcode_type cmd;
int ctx_hop_num;
int pbl_hop_num;
+ int cmd_num;
int ret;
int i;
cmd = hr_dev->is_vf ? HNS_ROCE_OPC_QUERY_VF_CAPS_NUM :
HNS_ROCE_OPC_QUERY_PF_CAPS_NUM;
+ cmd_num = hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 :
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM;
- for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) {
+ for (i = 0; i < cmd_num - 1; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i], cmd, true);
- if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1))
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
}
- ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM);
+ hns_roce_cmq_setup_basic_desc(&desc[cmd_num - 1], cmd, true);
+ desc[cmd_num - 1].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ ret = hns_roce_cmq_send(hr_dev, desc, cmd_num);
if (ret)
return ret;
@@ -2214,6 +2219,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+ resp_f = (struct hns_roce_query_pf_caps_f *)desc[5].data;
caps->local_ca_ack_delay = resp_a->local_ca_ack_delay;
caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
@@ -2278,6 +2284,8 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
+ caps->max_ack_req_msg_len = le32_to_cpu(resp_f->max_ack_req_msg_len);
+
caps->qpc_hop_num = ctx_hop_num;
caps->sccc_hop_num = ctx_hop_num;
caps->srqc_hop_num = ctx_hop_num;
@@ -4559,7 +4567,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
dma_addr_t trrl_ba;
dma_addr_t irrl_ba;
enum ib_mtu ib_mtu;
+ u8 ack_req_freq;
const u8 *smac;
+ int lp_msg_len;
u8 lp_pktn_ini;
u64 *mtts;
u8 *dmac;
@@ -4642,7 +4652,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return -EINVAL;
#define MIN_LP_MSG_LEN 1024
/* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
- lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
+ lp_msg_len = max(mtu, MIN_LP_MSG_LEN);
+ lp_pktn_ini = ilog2(lp_msg_len / mtu);
if (attr_mask & IB_QP_PATH_MTU) {
hr_reg_write(context, QPC_MTU, ib_mtu);
@@ -4652,8 +4663,22 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
- /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */
- hr_reg_write(context, QPC_ACK_REQ_FREQ, lp_pktn_ini);
+ /*
+ * There are several constraints for ACK_REQ_FREQ:
+ * 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise
+ * it may cause some unexpected retries when sending large
+ * payload.
+ * 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI.
+ * 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP
+ * or HC3 congestion control algorithm.
+ */
+ if (hr_qp->cong_type == CONG_TYPE_LDCP ||
+ hr_qp->cong_type == CONG_TYPE_HC3 ||
+ hr_dev->caps.max_ack_req_msg_len < lp_msg_len)
+ ack_req_freq = lp_pktn_ini;
+ else
+ ack_req_freq = ilog2(hr_dev->caps.max_ack_req_msg_len / mtu);
+ hr_reg_write(context, QPC_ACK_REQ_FREQ, ack_req_freq);
hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ);
hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index bc7466830eaf..1c2660305d27 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -1168,7 +1168,8 @@ struct hns_roce_cfg_gmv_tb_b {
#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
-#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 5
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 6
struct hns_roce_query_pf_caps_a {
u8 number_ports;
u8 local_ca_ack_delay;
@@ -1280,6 +1281,11 @@ struct hns_roce_query_pf_caps_e {
__le16 aeq_period;
};
+struct hns_roce_query_pf_caps_f {
+ __le32 max_ack_req_msg_len;
+ __le32 rsv[5];
+};
+
#define PF_CAPS_E_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)