summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c315
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h13
10 files changed, 381 insertions, 1 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
index 43d72c8b713b..754c81436408 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
@@ -1341,3 +1341,35 @@ Device Counters
- The number of times the device owned queue had not enough buffers
allocated.
- Error
+
+ * - `pci_bw_inbound_high`
+ - The number of times the device crossed the high inbound pcie bandwidth
+ threshold. To be compared to pci_bw_inbound_low to check if the device
+ is in a congested state.
+ If pci_bw_inbound_high == pci_bw_inbound_low then the device is not congested.
+ If pci_bw_inbound_high > pci_bw_inbound_low then the device is congested.
+ - Tnformative
+
+ * - `pci_bw_inbound_low`
+ - The number of times the device crossed the low inbound PCIe bandwidth
+ threshold. To be compared to pci_bw_inbound_high to check if the device
+ is in a congested state.
+ If pci_bw_inbound_high == pci_bw_inbound_low then the device is not congested.
+ If pci_bw_inbound_high > pci_bw_inbound_low then the device is congested.
+ - Informative
+
+ * - `pci_bw_outbound_high`
+ - The number of times the device crossed the high outbound pcie bandwidth
+ threshold. To be compared to pci_bw_outbound_low to check if the device
+ is in a congested state.
+ If pci_bw_outbound_high == pci_bw_outbound_low then the device is not congested.
+ If pci_bw_outbound_high > pci_bw_outbound_low then the device is congested.
+ - Informative
+
+ * - `pci_bw_outbound_low`
+ - The number of times the device crossed the low outbound PCIe bandwidth
+ threshold. To be compared to pci_bw_outbound_high to check if the device
+ is in a congested state.
+ If pci_bw_outbound_high == pci_bw_outbound_low then the device is not congested.
+ If pci_bw_outbound_high > pci_bw_outbound_low then the device is congested.
+ - Informative
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d292e6a9e22c..650df18a9216 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -29,7 +29,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \
- lib/crypto.o lib/sd.o
+ lib/crypto.o lib/sd.o en/pcie_cong_event.o
#
# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 64e69e616b1f..b6340e9453c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -920,6 +920,8 @@ struct mlx5e_priv {
struct notifier_block events_nb;
struct notifier_block blocking_events_nb;
+ struct mlx5e_pcie_cong_event *cong_event;
+
struct udp_tunnel_nic_info nic_info;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
new file mode 100644
index 000000000000..0ed017569a19
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+
+#include "en.h"
+#include "pcie_cong_event.h"
+
+#define MLX5E_CONG_HIGH_STATE 0x7
+
+enum {
+ MLX5E_INBOUND_CONG = BIT(0),
+ MLX5E_OUTBOUND_CONG = BIT(1),
+};
+
+struct mlx5e_pcie_cong_thresh {
+ u16 inbound_high;
+ u16 inbound_low;
+ u16 outbound_high;
+ u16 outbound_low;
+};
+
+struct mlx5e_pcie_cong_stats {
+ u32 pci_bw_inbound_high;
+ u32 pci_bw_inbound_low;
+ u32 pci_bw_outbound_high;
+ u32 pci_bw_outbound_low;
+};
+
+struct mlx5e_pcie_cong_event {
+ u64 obj_id;
+
+ struct mlx5e_priv *priv;
+
+ /* For event notifier and workqueue. */
+ struct work_struct work;
+ struct mlx5_nb nb;
+
+ /* Stores last read state. */
+ u8 state;
+
+ /* For ethtool stats group. */
+ struct mlx5e_pcie_cong_stats stats;
+};
+
+/* In units of 0.01 % */
+static const struct mlx5e_pcie_cong_thresh default_thresh_config = {
+ .inbound_high = 9000,
+ .inbound_low = 7500,
+ .outbound_high = 9000,
+ .outbound_low = 7500,
+};
+
+static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
+ pci_bw_inbound_high) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
+ pci_bw_inbound_low) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
+ pci_bw_outbound_high) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats,
+ pci_bw_outbound_low) },
+};
+
+#define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong)
+{
+ return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong)
+{
+ if (!priv->cong_event)
+ return;
+
+ for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++)
+ ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong)
+{
+ if (!priv->cong_event)
+ return;
+
+ for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) {
+ u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats,
+ mlx5e_pcie_cong_stats_desc,
+ i);
+
+ mlx5e_ethtool_put_stat(data, ctr);
+ }
+}
+
+MLX5E_DEFINE_STATS_GRP(pcie_cong, 0);
+
+static int
+mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
+ const struct mlx5e_pcie_cong_thresh *config,
+ u64 *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ void *cong_obj;
+ void *hdr;
+ int err;
+
+ hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
+ cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj);
+
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
+
+ MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1);
+ MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1);
+
+ MLX5_SET(pcie_cong_event_obj, cong_obj,
+ inbound_cong_high_threshold, config->inbound_high);
+ MLX5_SET(pcie_cong_event_obj, cong_obj,
+ inbound_cong_low_threshold, config->inbound_low);
+
+ MLX5_SET(pcie_cong_event_obj, cong_obj,
+ outbound_cong_high_threshold, config->outbound_high);
+ MLX5_SET(pcie_cong_event_obj, cong_obj,
+ outbound_cong_low_threshold, config->outbound_low);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+ mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n",
+ *obj_id,
+ config->inbound_high, config->inbound_low,
+ config->outbound_high, config->outbound_low);
+
+ return 0;
+}
+
+static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev,
+ u64 obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ void *hdr;
+
+ hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+ MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
+ u64 obj_id,
+ u32 *state)
+{
+ u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)];
+ void *obj;
+ void *hdr;
+ u8 cong;
+ int err;
+
+ hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
+
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+ MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj);
+
+ if (state) {
+ cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state);
+ if (cong == MLX5E_CONG_HIGH_STATE)
+ *state |= MLX5E_INBOUND_CONG;
+
+ cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state);
+ if (cong == MLX5E_CONG_HIGH_STATE)
+ *state |= MLX5E_OUTBOUND_CONG;
+ }
+
+ return 0;
+}
+
+static void mlx5e_pcie_cong_event_work(struct work_struct *work)
+{
+ struct mlx5e_pcie_cong_event *cong_event;
+ struct mlx5_core_dev *dev;
+ struct mlx5e_priv *priv;
+ u32 new_cong_state = 0;
+ u32 changes;
+ int err;
+
+ cong_event = container_of(work, struct mlx5e_pcie_cong_event, work);
+ priv = cong_event->priv;
+ dev = priv->mdev;
+
+ err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id,
+ &new_cong_state);
+ if (err) {
+ mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n",
+ err, cong_event->obj_id);
+ return;
+ }
+
+ changes = cong_event->state ^ new_cong_state;
+ if (!changes)
+ return;
+
+ cong_event->state = new_cong_state;
+
+ if (changes & MLX5E_INBOUND_CONG) {
+ if (new_cong_state & MLX5E_INBOUND_CONG)
+ cong_event->stats.pci_bw_inbound_high++;
+ else
+ cong_event->stats.pci_bw_inbound_low++;
+ }
+
+ if (changes & MLX5E_OUTBOUND_CONG) {
+ if (new_cong_state & MLX5E_OUTBOUND_CONG)
+ cong_event->stats.pci_bw_outbound_high++;
+ else
+ cong_event->stats.pci_bw_outbound_low++;
+ }
+}
+
+static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb,
+ unsigned long event, void *eqe)
+{
+ struct mlx5e_pcie_cong_event *cong_event;
+
+ cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb);
+ queue_work(cong_event->priv->wq, &cong_event->work);
+
+ return NOTIFY_OK;
+}
+
+int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pcie_cong_event *cong_event;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!mlx5_pcie_cong_event_supported(mdev))
+ return 0;
+
+ cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL,
+ mdev->priv.numa_node);
+ if (!cong_event)
+ return -ENOMEM;
+
+ INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work);
+ MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler,
+ OBJECT_CHANGE);
+
+ cong_event->priv = priv;
+
+ err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config,
+ &cong_event->obj_id);
+ if (err) {
+ mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");
+ goto err_free;
+ }
+
+ err = mlx5_eq_notifier_register(mdev, &cong_event->nb);
+ if (err) {
+ mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n");
+ goto err_obj_destroy;
+ }
+
+ priv->cong_event = cong_event;
+
+ return 0;
+
+err_obj_destroy:
+ mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id);
+err_free:
+ kvfree(cong_event);
+
+ return err;
+}
+
+void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pcie_cong_event *cong_event = priv->cong_event;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!cong_event)
+ return;
+
+ priv->cong_event = NULL;
+
+ mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
+ cancel_work_sync(&cong_event->work);
+
+ if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id))
+ mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n",
+ cong_event->obj_id);
+
+ kvfree(cong_event);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h
new file mode 100644
index 000000000000..b1ea46bf648a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_PCIE_CONG_EVENT_H__
+#define __MLX5_PCIE_CONG_EVENT_H__
+
+int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv);
+void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_PCIE_CONG_EVENT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fee323ade522..bd481f3384d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -76,6 +76,7 @@
#include "en/trap.h"
#include "lib/devcom.h"
#include "lib/sd.h"
+#include "en/pcie_cong_event.h"
static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev)
{
@@ -5989,6 +5990,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_init(priv);
+ mlx5e_pcie_cong_event_init(priv);
mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
@@ -6028,6 +6030,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5e_nic_set_rx_mode(priv);
+ mlx5e_pcie_cong_event_cleanup(priv);
mlx5e_hv_vhca_stats_destroy(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_cleanup(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 19664fa7f217..87536f158d07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -2612,6 +2612,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
#ifdef CONFIG_MLX5_MACSEC
&MLX5E_STATS_GRP(macsec_hw),
#endif
+ &MLX5E_STATS_GRP(pcie_cong),
};
unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index def5dea1463d..72dbcc1928ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -535,5 +535,6 @@ extern MLX5E_DECLARE_STATS_GRP(ipsec_hw);
extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
extern MLX5E_DECLARE_STATS_GRP(ptp);
extern MLX5E_DECLARE_STATS_GRP(macsec_hw);
+extern MLX5E_DECLARE_STATS_GRP(pcie_cong);
#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index dfb079e59d85..66dce17219a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -585,6 +585,9 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
async_event_mask |=
(1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE);
+ if (mlx5_pcie_cong_event_supported(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE);
+
mask[0] = async_event_mask;
if (MLX5_CAP_GEN(dev, event_cap))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 2e02bdea8361..c518380c4ce7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -495,4 +495,17 @@ static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev)
return 1 << MLX5_CAP_GEN(dev, log_max_eq);
}
+
+static inline bool mlx5_pcie_cong_event_supported(struct mlx5_core_dev *dev)
+{
+ u64 features = MLX5_CAP_GEN_2_64(dev, general_obj_types_127_64);
+
+ if (!(features & MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT))
+ return false;
+
+ if (dev->sd)
+ return false;
+
+ return true;
+}
#endif /* __MLX5_CORE_H__ */