diff options
26 files changed, 1300 insertions, 164 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index a3a4fece0cac..67453dd75b93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -79,6 +79,11 @@ struct page_pool; SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MLX5E_RX_MAX_HEAD (256) +#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) +#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) +#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024) +#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096) #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ @@ -152,6 +157,25 @@ struct page_pool; #define MLX5E_UMR_WQEBBS \ (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) +#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ + (sizeof(struct mlx5e_umr_wqe) +\ + (sizeof(struct mlx5_klm) * (sgl_len))) + +#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \ + (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB)) + +#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\ + (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS)) + +#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\ + (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm)) + +#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ + ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) + +#define MLX5E_MAX_KLM_PER_WQE(mdev) \ + MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE) + #define MLX5E_MSG_LEVEL NETIF_MSG_LINK #define mlx5e_dbg(mlevel, priv, format, ...) \ @@ -217,7 +241,10 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; - struct mlx5_mtt inline_mtts[0]; + union { + struct mlx5_mtt inline_mtts[0]; + struct mlx5_klm inline_klms[0]; + }; }; enum mlx5e_priv_flag { @@ -242,6 +269,21 @@ enum mlx5e_priv_flag { #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) +enum packet_merge { + MLX5E_PACKET_MERGE_NONE, + MLX5E_PACKET_MERGE_LRO, + MLX5E_PACKET_MERGE_SHAMPO, +}; + +struct mlx5e_packet_merge_param { + enum packet_merge type; + u32 timeout; + struct { + u8 match_criteria_type; + u8 alignment_granularity; + } shampo; +}; + struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; @@ -259,13 +301,12 @@ struct mlx5e_params { bool tunneled_offload_en; struct dim_cq_moder rx_cq_moderation; struct dim_cq_moder tx_cq_moderation; - bool lro_en; + struct mlx5e_packet_merge_param packet_merge; u8 tx_min_inline_mode; bool vlan_strip_disable; bool scatter_fcs_en; bool rx_dim_enabled; bool tx_dim_enabled; - u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; struct mlx5e_xsk *xsk; @@ -287,7 +328,8 @@ enum { MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */ - MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */ + MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ + MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ }; struct mlx5e_cq { @@ -578,6 +620,7 @@ typedef struct sk_buff * struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); +typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool); int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk); void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params); @@ -599,6 +642,25 @@ struct mlx5e_rq_frags_info { u8 wqe_bulk; }; +struct mlx5e_shampo_hd { + struct mlx5_core_mkey mkey; + struct mlx5e_dma_info *info; + struct page *last_page; + u16 hd_per_wq; + u16 hd_per_wqe; + unsigned long *bitmap; + u16 pi; + u16 ci; + __be32 key; + u64 last_addr; +}; + +struct mlx5e_hw_gro_data { + struct sk_buff *skb; + struct flow_keys fk; + int second_ip_id; +}; + struct mlx5e_rq { /* data path */ union { @@ -620,6 +682,7 @@ struct mlx5e_rq { u8 umr_in_progress; u8 umr_last_bulk; u8 umr_completed; + struct mlx5e_shampo_hd *shampo; } mpwqe; }; struct { @@ -639,6 +702,8 @@ struct mlx5e_rq { struct mlx5e_icosq *icosq; struct mlx5e_priv *priv; + struct mlx5e_hw_gro_data *hw_gro_data; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_post_rx_wqes post_wqes; mlx5e_fp_dealloc_wqe dealloc_wqe; @@ -886,6 +951,7 @@ struct mlx5e_priv { struct mlx5e_rx_handlers { mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo; }; extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; @@ -915,6 +981,7 @@ void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); +void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3cbb596821e8..f8c29022dbb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -87,7 +87,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), mlx5e_rx_get_linear_frag_sz(params, NULL)); - return !params->lro_en && linear_frag_sz <= PAGE_SIZE; + return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE && + linear_frag_sz <= PAGE_SIZE; } bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, @@ -138,6 +139,27 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, return params->log_rq_mtu_frames - log_pkts_per_wqe; } +u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE)); +} + +u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE); +} + +u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * + PAGE_SIZE; + + return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu)); +} + u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -164,19 +186,8 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, mlx5e_rx_is_linear_skb(params, xsk) : mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); - return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; -} - -struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params) -{ - struct mlx5e_lro_param lro_param; - - lro_param = (struct mlx5e_lro_param) { - .enabled = params->lro_en, - .timeout = params->lro_timeout, - }; - - return lro_param; + return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ? + mlx5e_get_linear_rq_headroom(params, xsk) : 0; } u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -453,6 +464,23 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } +static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk)); + int wqe_size = BIT(log_stride_sz) * num_strides; + + /* +1 is for the case that the pkt_per_rsrv dont consume the reservation + * so we get a filler cqe for the rest of the reservation. + */ + return order_base_2((wqe_size / rsrv_size) * wq_size * (pkt_per_rsrv + 1)); +} + static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -464,9 +492,12 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk); + else + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -485,10 +516,11 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { + bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO; bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && MLX5_CAP_GEN(mdev, relaxed_ordering_write); - return ro && params->lro_en ? + return ro && lro_en ? MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; } @@ -520,6 +552,22 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_wqe_stride_size, log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + mlx5e_shampo_get_log_rsrv_size(mdev, params)); + MLX5_SET(wq, wq, + log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + MLX5_SET(wq, wq, log_headers_entry_size, + mlx5e_shampo_get_log_hd_entry_size(mdev, params)); + MLX5_SET(rqc, rqc, reservation_timeout, + params->packet_merge.timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + params->packet_merge.shampo.match_criteria_type); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + params->packet_merge.shampo.alignment_granularity); + } break; } default: /* MLX5_WQ_TYPE_CYCLIC */ @@ -620,17 +668,80 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc) return MLX5_GET(wq, wq, log_wq_sz); } -static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, +/* This function calculates the maximum number of headers entries that are needed + * per WQE, the formula is based on the size of the reservations and the + * restriction we have about max packets for reservation that is equal to max + * headers per reservation. + */ +u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; + u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL)); + int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL); + int wqe_size = BIT(log_stride_sz) * num_strides; + u32 hd_per_wqe; + + /* Assumption: hd_per_wqe % 8 == 0. */ + hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv; + mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n", + __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv); + return hd_per_wqe; +} + +/* This function calculates the maximum number of headers entries that are needed + * for the WQ, this value is uesed to allocate the header buffer in HW, thus + * must be a pow of 2. + */ +u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); + int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + u32 hd_per_wqe, hd_per_wq; + + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); + hd_per_wq = roundup_pow_of_two(hd_per_wqe * wq_size); + return hd_per_wq; +} + +static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest; + void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); + int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + u32 wqebbs; + + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); + max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); + max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; + rest = max_hd_per_wqe % max_klm_per_umr; + wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe; + if (rest) + wqebbs += MLX5E_KLM_UMR_WQEBBS(rest); + wqebbs *= wq_size; + return wqebbs; +} + +static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_rq_param *rqp) { - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, - order_base_2(MLX5E_UMR_WQEBBS) + - mlx5e_get_rq_log_wq_sz(rqp->rqc)); - default: /* MLX5_WQ_TYPE_CYCLIC */ + u32 wqebbs; + + /* MLX5_WQ_TYPE_CYCLIC */ + if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - } + + wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc)); + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp); + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs)); } static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) @@ -697,7 +808,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, if (err) return err; - icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); + icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(mdev, params, &cparam->rq); async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 879ad46d754e..433e6967692d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -11,11 +11,6 @@ struct mlx5e_xsk_param { u16 chunk_size; }; -struct mlx5e_lro_param { - bool enabled; - u32 timeout; -}; - struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; @@ -116,6 +111,18 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); +u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param); +u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); @@ -125,7 +132,6 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params); /* Build queue parameters */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index b8b481b335cf..c1cdd8c2e37a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -127,7 +127,7 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, - const struct mlx5e_lro_param *init_lro_param, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, bool inner) { struct mlx5e_rss_params_traffic_type rss_tt; @@ -161,7 +161,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, rqtn = mlx5e_rqt_get_rqtn(&rss->rqt); mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn, rqtn, rss->inner_ft_support); - mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); rss_tt = mlx5e_rss_get_tt_config(rss, tt); mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); @@ -198,14 +198,14 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types } static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss, - const struct mlx5e_lro_param *init_lro_param, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, bool inner) { enum mlx5_traffic_types tt, max_tt; int err; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner); + err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); if (err) goto err_destroy_tirs; } @@ -297,7 +297,7 @@ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, bool inner_ft_support, u32 drop_rqn, - const struct mlx5e_lro_param *init_lro_param) + const struct mlx5e_packet_merge_param *init_pkt_merge_param) { int err; @@ -305,12 +305,12 @@ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, if (err) goto err_out; - err = mlx5e_rss_create_tirs(rss, init_lro_param, false); + err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false); if (err) goto err_destroy_rqt; if (inner_ft_support) { - err = mlx5e_rss_create_tirs(rss, init_lro_param, true); + err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true); if (err) goto err_destroy_tirs; } @@ -372,7 +372,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, */ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, - const struct mlx5e_lro_param *init_lro_param, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, bool inner, u32 *tirn) { struct mlx5e_tir *tir; @@ -381,7 +381,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, if (!tir) { /* TIR doesn't exist, create one */ int err; - err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner); + err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); if (err) return err; tir = rss_get_tir(rss, tt, inner); @@ -419,7 +419,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss) mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err); } -int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param) +int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, + struct mlx5e_packet_merge_param *pkt_merge_param) { struct mlx5e_tir_builder *builder; enum mlx5_traffic_types tt; @@ -429,7 +430,7 @@ int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_p if (!builder) return -ENOMEM; - mlx5e_tir_builder_build_lro(builder, lro_param); + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); final_err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index d522a10dadf3..c6b216416344 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -17,7 +17,7 @@ struct mlx5e_rss *mlx5e_rss_alloc(void); void mlx5e_rss_free(struct mlx5e_rss *rss); int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, bool inner_ft_support, u32 drop_rqn, - const struct mlx5e_lro_param *init_lro_param); + const struct mlx5e_packet_merge_param *init_pkt_merge_param); int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev, bool inner_ft_support, u32 drop_rqn); int mlx5e_rss_cleanup(struct mlx5e_rss *rss); @@ -30,13 +30,14 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, - const struct mlx5e_lro_param *init_lro_param, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, bool inner, u32 *tirn); void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns); void mlx5e_rss_disable(struct mlx5e_rss *rss); -int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param); +int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, + struct mlx5e_packet_merge_param *pkt_merge_param); int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, const u8 *key, const u8 *hfunc, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 13056cb9757d..142953847996 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -34,7 +34,7 @@ struct mlx5e_rx_res { /* API for rx_res_rss_* */ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, - const struct mlx5e_lro_param *init_lro_param, + const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; @@ -49,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return -ENOMEM; err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, - init_lro_param); + init_pkt_merge_param); if (err) goto err_rss_free; @@ -275,7 +275,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) } static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, - const struct mlx5e_lro_param *init_lro_param) + const struct mlx5e_packet_merge_param *init_pkt_merge_param) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_tir_builder *builder; @@ -306,7 +306,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), inner_ft_support); - mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); @@ -336,7 +336,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), inner_ft_support); - mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); @@ -437,7 +437,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features features, unsigned int max_nch, - u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch) { int err; @@ -447,11 +447,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->max_nch = max_nch; res->drop_rqn = drop_rqn; - err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch); + err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch); if (err) goto err_out; - err = mlx5e_rx_res_channels_init(res, init_lro_param); + err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param); if (err) goto err_rss_destroy; @@ -645,7 +645,8 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix) return err; } -int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param) +int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, + struct mlx5e_packet_merge_param *pkt_merge_param) { struct mlx5e_tir_builder *builder; int err, final_err; @@ -655,7 +656,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param if (!builder) return -ENOMEM; - mlx5e_tir_builder_build_lro(builder, lro_param); + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); final_err = 0; @@ -665,7 +666,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param if (!rss) continue; - err = mlx5e_rss_lro_set_param(rss, lro_param); + err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param); if (err) final_err = final_err ? : err; } @@ -673,7 +674,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param for (ix = 0; ix < res->max_nch; ix++) { err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); if (err) { - mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n", + mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n", mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err); if (!final_err) final_err = err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 4a15942d79f7..d09f7d174a51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -25,7 +25,7 @@ enum mlx5e_rx_res_features { struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features features, unsigned int max_nch, - u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch); void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); void mlx5e_rx_res_free(struct mlx5e_rx_res *res); @@ -57,7 +57,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, u8 rx_hash_fields); -int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param); +int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, + struct mlx5e_packet_merge_param *pkt_merge_param); int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index de936dc4bc48..da169b816665 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -70,24 +70,30 @@ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support); } -void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder, - const struct mlx5e_lro_param *lro_param) +void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, + const struct mlx5e_packet_merge_param *pkt_merge_param) { void *tirc = mlx5e_tir_builder_get_tirc(builder); const unsigned int rough_max_l2_l3_hdr_sz = 256; if (builder->modify) - MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1); - - if (!lro_param->enabled) - return; - - MLX5_SET(tirc, tirc, lro_enable_mask, - MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | - MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); - MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout); + MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1); + + switch (pkt_merge_param->type) { + case MLX5E_PACKET_MERGE_LRO: + MLX5_SET(tirc, tirc, packet_merge_mask, + MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO | + MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout); + break; + case MLX5E_PACKET_MERGE_SHAMPO: + MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO); + break; + default: + break; + } } static int mlx5e_hfunc_to_hw(u8 hfunc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h index e45149a78ed9..857a84bcd53a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -18,7 +18,7 @@ struct mlx5e_rss_params_traffic_type { }; struct mlx5e_tir_builder; -struct mlx5e_lro_param; +struct mlx5e_packet_merge_param; struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify); void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder); @@ -27,8 +27,8 @@ void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder); void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn); void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqtn, bool inner_ft_support); -void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder, - const struct mlx5e_lro_param *lro_param); +void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder, + const struct mlx5e_packet_merge_param *pkt_merge_param); void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, const struct mlx5e_rss_params_hash *rss_hash, const struct mlx5e_rss_params_traffic_type *rss_tt, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 055c3bc23733..4cdf8e5b24c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -36,6 +36,7 @@ ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ enum mlx5e_icosq_wqe_type { MLX5E_ICOSQ_WQE_NOP, MLX5E_ICOSQ_WQE_UMR_RX, + MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, #ifdef CONFIG_MLX5_EN_TLS MLX5E_ICOSQ_WQE_UMR_TLS, MLX5E_ICOSQ_WQE_SET_PSV_TLS, @@ -166,6 +167,10 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) return pi; } +struct mlx5e_shampo_umr { + u16 len; +}; + struct mlx5e_icosq_wqe_info { u8 wqe_type; u8 num_wqebbs; @@ -175,6 +180,7 @@ struct mlx5e_icosq_wqe_info { struct { struct mlx5e_rq *rq; } umr; + struct mlx5e_shampo_umr shampo; #ifdef CONFIG_MLX5_EN_TLS struct { struct mlx5e_ktls_offload_context_rx *priv_rx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 25926e581d18..c2ea5fad48dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1900,6 +1900,11 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return -EINVAL; } + if (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + netdev_warn(priv->netdev, "Can't set CQE compression with HW-GRO, disable it first.\n"); + return -EINVAL; + } + new_params = priv->channels.params; MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); if (rx_filter) @@ -1952,8 +1957,8 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return -EOPNOTSUPP; if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params)) return -EINVAL; - } else if (priv->channels.params.lro_en) { - netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n"); + } else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 81ebf281cdb4..ad0d234632a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -411,7 +411,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv, u32 rss_context, u32 *tirn) { if (fs->flow_type & FLOW_RSS) { - struct mlx5e_lro_param lro_param; + struct mlx5e_packet_merge_param pkt_merge_param; struct mlx5e_rss *rss; u32 flow_type; int err; @@ -426,8 +426,8 @@ static int flow_get_tirn(struct mlx5e_priv *priv, if (tt < 0) return -EINVAL; - lro_param = mlx5e_get_lro_param(&priv->channels.params); - err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn); + pkt_merge_param = priv->channels.params.packet_merge; + err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn); if (err) return err; eth_rule->rss = rss; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f3dec58026d9..6f398f636f01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -218,6 +218,45 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } +static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) +{ + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), + GFP_KERNEL, node); + if (!rq->mpwqe.shampo) + return -ENOMEM; + return 0; +} + +static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo); +} + +static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + + shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, + node); + if (!shampo->bitmap) + return -ENOMEM; + + shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq, + sizeof(*shampo->info)), + GFP_KERNEL, node); + if (!shampo->info) { + kvfree(shampo->bitmap); + return -ENOMEM; + } + return 0; +} + +static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo->bitmap); + kvfree(rq->mpwqe.shampo->info); +} + static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); @@ -233,10 +272,10 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) return 0; } -static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, - u64 npages, u8 page_shift, - struct mlx5_core_mkey *umr_mkey, - dma_addr_t filler_addr) +static int mlx5e_create_umr_mtt_mkey(struct mlx5_core_dev *mdev, + u64 npages, u8 page_shift, + struct mlx5_core_mkey *umr_mkey, + dma_addr_t filler_addr) { struct mlx5_mtt *mtt; int inlen; @@ -284,12 +323,59 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, return err; } +static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, + u64 nentries, + struct mlx5_core_mkey *umr_mkey) +{ + int inlen; + void *mkc; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); + mlx5e_mkey_set_relaxed_ordering(mdev, mkc); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(mkc, mkc, translations_octword_size, nentries); + MLX5_SET(mkc, mkc, length64, 1); + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); + + kvfree(in); + return err; +} + static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq)); - return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey, - rq->wqe_overflow.addr); + return mlx5e_create_umr_mtt_mkey(mdev, num_mtts, PAGE_SHIFT, + &rq->umr_mkey, rq->wqe_overflow.addr); +} + +static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq) +{ + u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + + if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) { + mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", + max_klm_size, rq->mpwqe.shampo->hd_per_wq); + return -EINVAL; + } + return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + &rq->mpwqe.shampo->mkey); } static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix) @@ -403,6 +489,65 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); } +static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq, + u32 *pool_size, + int node) +{ + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + int wq_size; + int err; + + if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + return 0; + err = mlx5e_rq_shampo_hd_alloc(rq, node); + if (err) + goto out; + rq->mpwqe.shampo->hd_per_wq = + mlx5e_shampo_hd_per_wq(mdev, params, rqp); + err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); + if (err) + goto err_shampo_hd; + err = mlx5e_rq_shampo_hd_info_alloc(rq, node); + if (err) + goto err_shampo_info; + rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); + if (!rq->hw_gro_data) { + err = -ENOMEM; + goto err_hw_gro_data; + } + rq->mpwqe.shampo->key = + cpu_to_be32(rq->mpwqe.shampo->mkey.key); + rq->mpwqe.shampo->hd_per_wqe = + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) / + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + return 0; + +err_hw_gro_data: + mlx5e_rq_shampo_hd_info_free(rq); +err_shampo_info: + mlx5_core_destroy_mkey(mdev, &rq->mpwqe.shampo->mkey); +err_shampo_hd: + mlx5e_rq_shampo_hd_free(rq); +out: + return err; +} + +static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) +{ + if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + return; + + kvfree(rq->hw_gro_data); + mlx5e_rq_shampo_hd_info_free(rq); + mlx5_core_destroy_mkey(rq->mdev, &rq->mpwqe.shampo->mkey); + mlx5e_rq_shampo_hd_free(rq); +} + static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rqp, @@ -460,6 +605,11 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) goto err_rq_mkey; + + err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node); + if (err) + goto err_free_by_rq_type; + break; default: /* MLX5_WQ_TYPE_CYCLIC */ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, @@ -512,14 +662,14 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (IS_ERR(rq->page_pool)) { err = PTR_ERR(rq->page_pool); rq->page_pool = NULL; - goto err_free_by_rq_type; + goto err_free_shampo; } if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); } if (err) - goto err_free_by_rq_type; + goto err_free_shampo; for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -528,8 +678,10 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, u32 byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; u64 dma_offset = mlx5e_get_mpwqe_offset(i); + u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ? + 0 : rq->buff.headroom; - wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom); + wqe->data[0].addr = cpu_to_be64(dma_offset + headroom); wqe->data[0].byte_count = cpu_to_be32(byte_count); wqe->data[0].lkey = rq->mkey_be; } else { @@ -569,6 +721,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, return 0; +err_free_shampo: + mlx5e_rq_free_shampo(rq); err_free_by_rq_type: switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -609,6 +763,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) kvfree(rq->mpwqe.info); mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); mlx5e_free_mpwqe_rq_drop_page(rq); + mlx5e_rq_free_shampo(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ kvfree(rq->wqe.frags); @@ -662,6 +817,12 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + MLX5_SET(wq, wq, log_headers_buffer_entry_num, + order_base_2(rq->mpwqe.shampo->hd_per_wq)); + MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey.key); + } + mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); @@ -801,6 +962,15 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) head = mlx5_wq_ll_get_wqe_next_ix(wq, head); } + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + u16 len; + + len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) & + (rq->mpwqe.shampo->hd_per_wq - 1); + mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false); + rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci; + } + rq->mpwqe.actual_wq_head = wq->head; rq->mpwqe.umr_in_progress = 0; rq->mpwqe.umr_completed = 0; @@ -826,6 +996,10 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) mlx5_wq_ll_pop(wq, wqe_ix_be, &wqe->next.next_wqe_index); } + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq, + 0, true); } else { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -845,6 +1019,9 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5_core_dev *mdev = rq->mdev; int err; + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state); + err = mlx5e_alloc_rq(params, xsk, param, node, rq); if (err) return err; @@ -2222,17 +2399,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) chs->num = 0; } -static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) +static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv) { struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_lro_param lro_param; - - lro_param = mlx5e_get_lro_param(&priv->channels.params); - return mlx5e_rx_res_lro_set_param(res, &lro_param); + return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge); } -static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro); +static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge); static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 mtu) @@ -3351,16 +3525,59 @@ static int set_feature_lro(struct net_device *netdev, bool enable) } new_params = *cur_params; - new_params.lro_en = enable; - if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) - reset = false; + if (enable) + new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO; + else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO) + new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; + else + goto out; + + if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO && + new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) { + if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) + reset = false; + } + } + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); +out: + mutex_unlock(&priv->state_lock); + return err; +} + +static int set_feature_hw_gro(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; + bool reset = true; + int err = 0; + + mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + + if (enable) { + if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n"); + err = -EINVAL; + goto out; + } + new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; + new_params.packet_merge.shampo.match_criteria_type = + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; + new_params.packet_merge.shampo.alignment_granularity = + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; + } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; + } else { + goto out; } err = mlx5e_safe_switch_params(priv, &new_params, - mlx5e_modify_tirs_lro_ctx, NULL, reset); + mlx5e_modify_tirs_packet_merge_ctx, NULL, reset); out: mutex_unlock(&priv->state_lock); return err; @@ -3539,6 +3756,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); + err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); @@ -3599,6 +3817,10 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); features &= ~NETIF_F_LRO; } + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_GRO_HW; + } } if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { @@ -3687,7 +3909,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, goto out; } - if (params->lro_en) + if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO) reset = false; if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -4144,8 +4366,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) struct net_device *netdev = priv->netdev; struct mlx5e_params new_params; - if (priv->channels.params.lro_en) { - netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n"); return -EINVAL; } @@ -4402,9 +4624,10 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { /* No XSK params: checking the availability of striding RQ in general. */ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - params->lro_en = !slow_pci_heuristic(mdev); + params->packet_merge.type = slow_pci_heuristic(mdev) ? + MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO; } - params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? @@ -4539,6 +4762,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + if (!!MLX5_CAP_GEN(mdev, shampo) && + mlx5e_check_fragmented_striding_rq_cap(mdev)) + netdev->hw_features |= NETIF_F_GRO_HW; + if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -4589,6 +4816,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (fcs_enabled) netdev->features &= ~NETIF_F_RXALL; netdev->features &= ~NETIF_F_LRO; + netdev->features &= ~NETIF_F_GRO_HW; netdev->features &= ~NETIF_F_RXFCS; #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) @@ -4693,7 +4921,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; enum mlx5e_rx_res_features features; - struct mlx5e_lro_param lro_param; int err; priv->rx_res = mlx5e_rx_res_alloc(); @@ -4711,9 +4938,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP; if (priv->channels.params.tunneled_offload_en) features |= MLX5E_RX_RES_FEATURE_INNER_FT; - lro_param = mlx5e_get_lro_param(&priv->channels.params); err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, - priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, priv->channels.params.num_channels); if (err) goto err_close_drop_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0684ac6699b2..5230e0422cae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -793,7 +793,6 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_lro_param lro_param; int err; priv->rx_res = mlx5e_rx_res_alloc(); @@ -808,9 +807,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) return err; } - lro_param = mlx5e_get_lro_param(&priv->channels.params); err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, - priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, priv->channels.params.num_channels); if (err) goto err_close_drop_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 29a6586ef28d..fe979edd96dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -33,9 +33,12 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/bitmap.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> +#include <net/udp.h> +#include <net/tcp.h> #include "en.h" #include "en/txrx.h" #include "en_tc.h" @@ -62,10 +65,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w u16 cqe_bcnt, u32 head_offset, u32 page_idx); static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { .handle_rx_cqe = mlx5e_handle_rx_cqe, .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo, }; static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) @@ -185,8 +190,9 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, mlx5e_read_mini_arr_slot(wq, cqd, cqcc); mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); - INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, - mlx5e_handle_rx_cqe, rq, &cqd->title); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, + rq, &cqd->title); } mlx5e_cqes_update_owner(wq, cqcc - wq->cc); wq->cc = cqcc; @@ -206,8 +212,9 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, mlx5e_read_title_slot(rq, wq, cc); mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); mlx5e_decompress_cqe(rq, wq, cc); - INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, - mlx5e_handle_rx_cqe, rq, &cqd->title); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, + rq, &cqd->title); cqd->mini_arr_idx++; return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; @@ -448,13 +455,13 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, static inline void mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, struct mlx5e_dma_info *dma_info, - int offset_from, u32 headlen) + int offset_from, int dma_offset, u32 headlen) { const void *from = page_address(dma_info->page) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); - dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len, + dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len, DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, from, len); } @@ -494,6 +501,157 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) mlx5_wq_ll_update_db_record(wq); } +/* This function returns the size of the continuous free space inside a bitmap + * that starts from first and no longer than len including circular ones. + */ +static int bitmap_find_window(unsigned long *bitmap, int len, + int bitmap_size, int first) +{ + int next_one, count; + + next_one = find_next_bit(bitmap, bitmap_size, first); + if (next_one == bitmap_size) { + if (bitmap_size - first >= len) + return len; + next_one = find_next_bit(bitmap, bitmap_size, 0); + count = next_one + bitmap_size - first; + } else { + count = next_one - first; + } + + return min(len, count); +} + +static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, + __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs) +{ + memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms)); + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + umr_wqe->ctrl.umr_mkey = key; + umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) + | MLX5E_KLM_UMR_DS_CNT(klm_len)); + umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len); + umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); +} + +static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + u16 klm_entries, u16 index) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries; + u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey.key; + struct page *page = shampo->last_page; + u64 addr = shampo->last_addr; + struct mlx5e_dma_info *dma_info; + struct mlx5e_umr_wqe *umr_wqe; + int headroom; + + headroom = rq->buff.headroom; + new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); + entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT); + wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); + pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); + umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + + for (i = 0; i < entries; i++, index++) { + dma_info = &shampo->info[index]; + if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < + MLX5_UMR_KLM_ALIGNMENT)) + goto update_klm; + header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << + MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + if (!(header_offset & (PAGE_SIZE - 1))) { + err = mlx5e_page_alloc(rq, dma_info); + if (unlikely(err)) + goto err_unmap; + addr = dma_info->addr; + page = dma_info->page; + } else { + dma_info->addr = addr + header_offset; + dma_info->page = page; + } + +update_klm: + umr_wqe->inline_klms[i].bcount = + cpu_to_be32(MLX5E_RX_MAX_HEAD); + umr_wqe->inline_klms[i].key = cpu_to_be32(lkey); + umr_wqe->inline_klms[i].va = + cpu_to_be64(dma_info->addr + headroom); + } + + sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, + .num_wqebbs = wqe_bbs, + .shampo.len = new_entries, + }; + + shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); + shampo->last_page = page; + shampo->last_addr = addr; + sq->pc += wqe_bbs; + sq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_unmap: + while (--i >= 0) { + if (--index < 0) + index = shampo->hd_per_wq - 1; + dma_info = &shampo->info[index]; + if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { + dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); + mlx5e_page_release(rq, dma_info, true); + } + } + rq->stats->buff_alloc_err++; + return err; +} + +static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 klm_entries, num_wqe, index, entries_before; + struct mlx5e_icosq *sq = rq->icosq; + int i, err, max_klm_entries, len; + + max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); + klm_entries = bitmap_find_window(shampo->bitmap, + shampo->hd_per_wqe, + shampo->hd_per_wq, shampo->pi); + if (!klm_entries) + return 0; + + klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); + index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT); + entries_before = shampo->hd_per_wq - index; + + if (unlikely(entries_before < klm_entries)) + num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) + + DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries); + else + num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries); + + for (i = 0; i < num_wqe; i++) { + len = (klm_entries > max_klm_entries) ? max_klm_entries : + klm_entries; + if (unlikely(index + len > shampo->hd_per_wq)) + len = shampo->hd_per_wq - index; + err = mlx5e_build_shampo_hd_umr(rq, sq, len, index); + if (unlikely(err)) + return err; + index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1); + klm_entries -= len; + } + + return 0; +} + static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; @@ -514,6 +672,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) goto err; } + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + err = mlx5e_alloc_rx_hd_mpwqe(rq); + if (unlikely(err)) + goto err; + } + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); @@ -558,6 +722,44 @@ err: return err; } +/* This function is responsible to dealloc SHAMPO header buffer. + * close == true specifies that we are in the middle of closing RQ operation so + * we go over all the entries and if they are not in use we free them, + * otherwise we only go over a specific range inside the header buffer that are + * not in use. + */ +void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + int hd_per_wq = shampo->hd_per_wq; + struct page *deleted_page = NULL; + struct mlx5e_dma_info *hd_info; + int i, index = start; + + for (i = 0; i < len; i++, index++) { + if (index == hd_per_wq) + index = 0; + + if (close && !test_bit(index, shampo->bitmap)) + continue; + + hd_info = &shampo->info[index]; + hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); + if (hd_info->page != deleted_page) { + deleted_page = hd_info->page; + mlx5e_page_release(rq, hd_info, false); + } + } + + if (start + len > hd_per_wq) { + len -= hd_per_wq - start; + bitmap_clear(shampo->bitmap, start, hd_per_wq - start); + start = 0; + } + + bitmap_clear(shampo->bitmap, start, len); +} + static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; @@ -629,6 +831,28 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) sq->cc = sqcc; } +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + struct mlx5e_shampo_hd *shampo; + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + int end, from, len = umr.len; + + shampo = rq->mpwqe.shampo; + end = shampo->hd_per_wq; + from = shampo->ci; + if (from + len > shampo->hd_per_wq) { + len -= end - from; + bitmap_set(shampo->bitmap, from, end - from); + from = 0; + } + + bitmap_set(shampo->bitmap, from, len); + shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); +} + int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); @@ -685,6 +909,9 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) break; case MLX5E_ICOSQ_WQE_NOP: break; + case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR: + mlx5e_handle_shampo_hd_umr(wi->shampo, sq); + break; #ifdef CONFIG_MLX5_EN_TLS case MLX5E_ICOSQ_WQE_UMR_TLS: break; @@ -782,8 +1009,8 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) if (tcp_ack) { tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; + tcp->ack_seq = cqe->lro.ack_seq_num; + tcp->window = cqe->lro.tcp_win; } } @@ -809,7 +1036,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, tcp = ip_p + sizeof(struct iphdr); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - ipv4->ttl = cqe->lro_min_ttl; + ipv4->ttl = cqe->lro.min_ttl; ipv4->tot_len = cpu_to_be16(tot_len); ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, @@ -829,7 +1056,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, tcp = ip_p + sizeof(struct ipv6hdr); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - ipv6->hop_limit = cqe->lro_min_ttl; + ipv6->hop_limit = cqe->lro.min_ttl; ipv6->payload_len = cpu_to_be16(payload_len); mlx5e_lro_update_tcp_hdr(cqe, tcp); @@ -841,6 +1068,142 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, } } +static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) +{ + struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; + u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; + + return page_address(last_head->page) + head_offset; +} + +static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) +{ + int udp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + udp_off); + uh->len = htons(skb->len - udp_off); + + if (uh->check) + uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr, + ipv4->daddr, 0); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; +} + +static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6) +{ + int udp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct udphdr *uh; + + uh = (struct udphdr *)(skb->data + udp_off); + uh->len = htons(skb->len - udp_off); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr, + &ipv6->daddr, 0); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; +} + +static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + struct tcphdr *skb_tcp_hd) +{ + u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + struct tcphdr *last_tcp_hd; + void *last_hd_addr; + + last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); + last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff; + tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH); +} + +static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4, + struct mlx5_cqe64 *cqe, bool match) +{ + int tcp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct tcphdr *tcp; + + tcp = (struct tcphdr *)(skb->data + tcp_off); + if (match) + mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); + + tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr, + ipv4->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + + skb->csum_start = (unsigned char *)tcp - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (tcp->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; +} + +static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6, + struct mlx5_cqe64 *cqe, bool match) +{ + int tcp_off = rq->hw_gro_data->fk.control.thoff; + struct sk_buff *skb = rq->hw_gro_data->skb; + struct tcphdr *tcp; + + tcp = (struct tcphdr *)(skb->data + tcp_off); + if (match) + mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); + + tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr, + &ipv6->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + skb->csum_start = (unsigned char *)tcp - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + + if (tcp->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; +} + +static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) +{ + bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)); + struct sk_buff *skb = rq->hw_gro_data->skb; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + skb->ip_summed = CHECKSUM_PARTIAL; + + if (is_ipv4) { + int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr); + struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff); + __be16 newlen = htons(skb->len - nhoff); + + csum_replace2(&ipv4->check, ipv4->tot_len, newlen); + ipv4->tot_len = newlen; + + if (ipv4->protocol == IPPROTO_TCP) + mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match); + else + mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4); + } else { + int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr); + struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff); + + ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6)); + + if (ipv6->nexthdr == IPPROTO_TCP) + mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match); + else + mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6); + } +} + static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { @@ -1090,6 +1453,27 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, stats->mcast_packets++; } +static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct mlx5e_rq_stats *stats = rq->stats; + + stats->packets++; + stats->gro_packets++; + stats->bytes += cqe_bcnt; + stats->gro_bytes += cqe_bcnt; + if (NAPI_GRO_CB(skb)->count != 1) + return; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + skb_reset_network_header(skb); + if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) { + napi_gro_receive(rq->cq.napi, skb); + rq->hw_gro_data->skb = NULL; + } +} + static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u32 cqe_bcnt, @@ -1199,7 +1583,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset, + headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1395,6 +1780,30 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { }; #endif +static void +mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di, + u32 data_bcnt, u32 data_offset) +{ + net_prefetchw(skb->data); + + while (data_bcnt) { + u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); + unsigned int truesize; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + truesize = pg_consumed_bytes; + else + truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); + + mlx5e_add_skb_frag(rq, skb, di, data_offset, + pg_consumed_bytes, truesize); + + data_bcnt -= pg_consumed_bytes; + data_offset = 0; + di++; + } +} + static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, u16 cqe_bcnt, u32 head_offset, u32 page_idx) @@ -1420,20 +1829,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w frag_offset -= PAGE_SIZE; } - while (byte_cnt) { - u32 pg_consumed_bytes = - min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - unsigned int truesize = - ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - - mlx5e_add_skb_frag(rq, skb, di, frag_offset, - pg_consumed_bytes, truesize); - byte_cnt -= pg_consumed_bytes; - frag_offset = 0; - di++; - } + mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset); /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1487,6 +1885,181 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return skb; } +static void +mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 header_index) +{ + struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index]; + u16 head_offset = head->addr & (PAGE_SIZE - 1); + u16 head_size = cqe->shampo.header_size; + u16 rx_headroom = rq->buff.headroom; + struct sk_buff *skb = NULL; + void *hdr, *data; + u32 frag_size; + + hdr = page_address(head->page) + head_offset; + data = hdr + rx_headroom; + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); + + if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + /* build SKB around header */ + dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE); + prefetchw(hdr); + prefetch(data); + skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size); + + if (unlikely(!skb)) + return; + + /* queue up for recycling/reuse */ + page_ref_inc(head->page); + + } else { + /* allocate SKB and copy header for large header */ + rq->stats->gro_large_hds++; + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(head_size, sizeof(long))); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return; + } + + prefetchw(skb->data); + mlx5e_copy_skb_header(rq->pdev, skb, head, + head_offset + rx_headroom, + rx_headroom, head_size); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += head_size; + skb->len += head_size; + } + rq->hw_gro_data->skb = skb; + NAPI_GRO_CB(skb)->count = 1; + skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size; +} + +static void +mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz) +{ + skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1]; + unsigned int frag_size = skb_frag_size(last_frag); + unsigned int frag_truesize; + + frag_truesize = ALIGN(frag_size, BIT(log_stride_sz)); + skb->truesize += frag_truesize - frag_size; +} + +static void +mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) +{ + struct sk_buff *skb = rq->hw_gro_data->skb; + struct mlx5e_rq_stats *stats = rq->stats; + + stats->gro_skbs++; + if (likely(skb_shinfo(skb)->nr_frags)) + mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz); + if (NAPI_GRO_CB(skb)->count > 1) + mlx5e_shampo_update_hdr(rq, cqe, match); + napi_gro_receive(rq->cq.napi, skb); + rq->hw_gro_data->skb = NULL; +} + +static bool +mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) +{ + int nr_frags = skb_shinfo(skb)->nr_frags; + + return PAGE_SIZE * nr_frags + data_bcnt <= GSO_MAX_SIZE; +} + +static void +mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u64 addr = shampo->info[header_index].addr; + + if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); + mlx5e_page_release(rq, &shampo->info[header_index], true); + } + bitmap_clear(shampo->bitmap, header_index, 1); +} + +static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; + u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u32 data_offset = wqe_offset & (PAGE_SIZE - 1); + u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + struct sk_buff **skb = &rq->hw_gro_data->skb; + bool flush = cqe->shampo.flush; + bool match = cqe->shampo.match; + struct mlx5e_rq_stats *stats = rq->stats; + struct mlx5e_rx_wqe_ll *wqe; + struct mlx5e_dma_info *di; + struct mlx5e_mpw_info *wi; + struct mlx5_wq_ll *wq; + + wi = &rq->mpwqe.info[wqe_id]; + wi->consumed_strides += cstrides; + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + trigger_report(rq, cqe); + stats->wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + stats->mpwqe_filler_cqes++; + stats->mpwqe_filler_strides += cstrides; + goto mpwrq_cqe_out; + } + + stats->gro_match_packets += match; + + if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) { + match = false; + mlx5e_shampo_flush_skb(rq, cqe, match); + } + + if (!*skb) { + mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + if (unlikely(!*skb)) + goto free_hd_entry; + } else { + NAPI_GRO_CB(*skb)->count++; + if (NAPI_GRO_CB(*skb)->count == 2 && + rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) { + void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); + int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff - + sizeof(struct iphdr); + struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff); + + rq->hw_gro_data->second_ip_id = ntohs(iph->id); + } + } + + di = &wi->umr.dma_info[page_idx]; + mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); + + mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); + if (flush) + mlx5e_shampo_flush_skb(rq, cqe, match); +free_hd_entry: + mlx5e_free_rx_shampo_hd_entry(rq, header_index); +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) + return; + + wq = &rq->mpwqe.wq; + wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); + mlx5e_free_rx_mpwqe(rq, wi, true); + mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); +} + static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); @@ -1579,11 +2152,15 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(cqwq); - INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, - mlx5e_handle_rx_cqe, rq, cqe); + INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo, + rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb) + mlx5e_shampo_flush_skb(rq, NULL, false); + if (rcu_access_pointer(rq->xdp_prog)) mlx5e_xdp_rx_poll_complete(rq); @@ -1784,15 +2361,24 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; if (mlx5_fpga_is_ipsec_device(mdev)) { netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); return -EINVAL; } - if (!rq->handle_rx_cqe) { - netdev_err(netdev, "RX handler of MPWQE RQ is not set\n"); - return -EINVAL; + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n"); + return -EINVAL; + } + } else { + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; + if (!rq->handle_rx_cqe) { + netdev_err(netdev, "RX handler of MPWQE RQ is not set\n"); + return -EINVAL; + } } + break; default: /* MLX5_WQ_TYPE_CYCLIC */ rq->wqe.skb_from_cqe = xsk ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index e1dd17019030..2a9bfc3ffa2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -128,6 +128,11 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_skbs) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_match_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_gro_large_hds) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, @@ -313,6 +318,11 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_bytes += rq_stats->bytes; s->rx_lro_packets += rq_stats->lro_packets; s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_gro_packets += rq_stats->gro_packets; + s->rx_gro_bytes += rq_stats->gro_bytes; + s->rx_gro_skbs += rq_stats->gro_skbs; + s->rx_gro_match_packets += rq_stats->gro_match_packets; + s->rx_gro_large_hds += rq_stats->gro_large_hds; s->rx_ecn_mark += rq_stats->ecn_mark; s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; @@ -1760,6 +1770,11 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_skbs) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_match_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, gro_large_hds) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 139e59f30db0..2c1ed5b81be6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -144,6 +144,11 @@ struct mlx5e_sw_stats { u64 tx_mpwqe_pkts; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_gro_packets; + u64 rx_gro_bytes; + u64 rx_gro_skbs; + u64 rx_gro_match_packets; + u64 rx_gro_large_hds; u64 rx_mcast_packets; u64 rx_ecn_mark; u64 rx_removed_vlan_packets; @@ -322,6 +327,11 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 gro_packets; + u64 gro_bytes; + u64 gro_skbs; + u64 gro_match_packets; + u64 gro_large_hds; u64 mcast_packets; u64 ecn_mark; u64 removed_vlan_packets; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 1037e3629e7e..2d8406fab844 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -269,6 +269,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, shampo)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 3b8d8ada1a01..84297cc1b509 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -67,7 +67,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; - params->lro_en = false; + params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; params->tunneled_offload_en = false; } @@ -356,7 +356,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_lro_param lro_param; int err; priv->rx_res = mlx5e_rx_res_alloc(); @@ -371,9 +370,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - lro_param = mlx5e_get_lro_param(&priv->channels.params); err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, - priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->max_nch, priv->drop_rq.rqn, + &priv->channels.params.packet_merge, priv->channels.params.num_channels); if (err) goto err_close_drop_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index dea199e79bed..57af962cad29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -31,7 +31,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {}; static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} -bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; } +static inline bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; } #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_LAG_MP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f8446395163a..a92a92a52346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1417,6 +1417,7 @@ static const int types[] = { MLX5_CAP_VDPA_EMULATION, MLX5_CAP_IPSEC, MLX5_CAP_PORT_SELECTION, + MLX5_CAP_DEV_SHAMPO, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 37f36dad18bd..a241dcf50f39 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -123,6 +123,8 @@ struct device; */ unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); +unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); void bitmap_free(const unsigned long *bitmap); /* Managed variants of the above. */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f8a0bbb42c3b..56bcf95d4ab7 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -290,6 +290,7 @@ enum { MLX5_UMR_INLINE = (1 << 7), }; +#define MLX5_UMR_KLM_ALIGNMENT 4 #define MLX5_UMR_MTT_ALIGNMENT 0x40 #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT @@ -799,10 +800,23 @@ struct mlx5_cqe64 { u8 tls_outer_l3_tunneled; u8 rsvd0; __be16 wqe_id; - u8 lro_tcppsh_abort_dupack; - u8 lro_min_ttl; - __be16 lro_tcp_win; - __be32 lro_ack_seq_num; + union { + struct { + u8 tcppsh_abort_dupack; + u8 min_ttl; + __be16 tcp_win; + __be32 ack_seq_num; + } lro; + struct { + u8 reserved0:1; + u8 match:1; + u8 flush:1; + u8 reserved3:5; + u8 header_size; + __be16 header_entry_index; + __be32 data_offset; + } shampo; + }; __be32 rss_hash_result; u8 rss_hash_type; u8 ml_path; @@ -872,7 +886,7 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { - return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; + return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; } static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) @@ -1186,6 +1200,7 @@ enum mlx5_cap_type { MLX5_CAP_VDPA_EMULATION = 0x13, MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, + MLX5_CAP_DEV_SHAMPO = 0x1d, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, /* NUM OF CAP Types */ @@ -1431,6 +1446,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_IPSEC(mdev, cap)\ MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap) +#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\ + MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 746381eccccf..0bb78c04336c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1350,7 +1350,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_b0[0x1]; u8 uplink_follow[0x1]; u8 ts_cqe_to_dest_cqn[0x1]; - u8 reserved_at_b3[0xd]; + u8 reserved_at_b3[0x7]; + u8 shampo[0x1]; + u8 reserved_at_bb[0x5]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -1893,7 +1895,21 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_139[0x4]; u8 log_wqe_stride_size[0x3]; - u8 reserved_at_140[0x4c0]; + u8 reserved_at_140[0x80]; + + u8 headers_mkey[0x20]; + + u8 shampo_enable[0x1]; + u8 reserved_at_1e1[0x4]; + u8 log_reservation_size[0x3]; + u8 reserved_at_1e8[0x5]; + u8 log_max_num_of_packets_per_reservation[0x3]; + u8 reserved_at_1f0[0x6]; + u8 log_headers_entry_size[0x2]; + u8 reserved_at_1f8[0x4]; + u8 log_headers_buffer_entry_num[0x4]; + + u8 reserved_at_200[0x400]; struct mlx5_ifc_cmd_pas_bits pas[]; }; @@ -3169,6 +3185,20 @@ struct mlx5_ifc_roce_addr_layout_bits { u8 reserved_at_e0[0x20]; }; +struct mlx5_ifc_shampo_cap_bits { + u8 reserved_at_0[0x3]; + u8 shampo_log_max_reservation_size[0x5]; + u8 reserved_at_8[0x3]; + u8 shampo_log_min_reservation_size[0x5]; + u8 shampo_min_mss_size[0x10]; + + u8 reserved_at_20[0x3]; + u8 shampo_max_log_headers_entry_size[0x5]; + u8 reserved_at_28[0x18]; + + u8 reserved_at_40[0x7c0]; +}; + union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2; @@ -3187,6 +3217,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_tls_cap_bits tls_cap; struct mlx5_ifc_device_mem_cap_bits device_mem_cap; struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap; + struct mlx5_ifc_shampo_cap_bits shampo_cap; u8 reserved_at_0[0x8000]; }; @@ -3361,8 +3392,9 @@ enum { }; enum { - MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, - MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, + MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0), + MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1), + MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO = BIT(2), }; enum { @@ -3387,7 +3419,7 @@ struct mlx5_ifc_tirc_bits { u8 reserved_at_80[0x4]; u8 lro_timeout_period_usecs[0x10]; - u8 lro_enable_mask[0x4]; + u8 packet_merge_mask[0x4]; u8 lro_max_ip_payload_size[0x8]; u8 reserved_at_a0[0x40]; @@ -3569,6 +3601,18 @@ enum { MLX5_RQC_STATE_ERR = 0x3, }; +enum { + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_BYTE = 0x0, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE = 0x1, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_PAGE = 0x2, +}; + +enum { + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_NO_MATCH = 0x0, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED = 0x1, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_FIVE_TUPLE = 0x2, +}; + struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; u8 delay_drop_en[0x1]; @@ -3601,7 +3645,13 @@ struct mlx5_ifc_rqc_bits { u8 reserved_at_c0[0x10]; u8 hairpin_peer_vhca[0x10]; - u8 reserved_at_e0[0xa0]; + u8 reserved_at_e0[0x46]; + u8 shampo_no_match_alignment_granularity[0x2]; + u8 reserved_at_128[0x6]; + u8 shampo_match_criteria_type[0x2]; + u8 reservation_timeout[0x10]; + + u8 reserved_at_140[0x40]; struct mlx5_ifc_wq_bits wq; }; @@ -6657,7 +6707,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits { u8 reserved_at_3c[0x1]; u8 hash[0x1]; u8 reserved_at_3e[0x1]; - u8 lro[0x1]; + u8 packet_merge[0x1]; }; struct mlx5_ifc_modify_tir_out_bits { diff --git a/lib/bitmap.c b/lib/bitmap.c index 663dd81967d4..926408883456 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1398,6 +1398,19 @@ unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags) } EXPORT_SYMBOL(bitmap_zalloc); +unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node) +{ + return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long), + flags, node); +} +EXPORT_SYMBOL(bitmap_alloc_node); + +unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node) +{ + return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node); +} +EXPORT_SYMBOL(bitmap_zalloc_node); + void bitmap_free(const unsigned long *bitmap) { kfree(bitmap); diff --git a/net/core/dev.c b/net/core/dev.c index 4e3d19a06de4..e8754560e641 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9922,6 +9922,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) { + netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n"); + features &= ~NETIF_F_LRO; + } + if (features & NETIF_F_HW_TLS_TX) { bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); |