summaryrefslogtreecommitdiff
path: root/drivers/edac/skx_common.c
diff options
context:
space:
mode:
authorQiuxu Zhuo <qiuxu.zhuo@intel.com>2025-02-14 08:27:28 +0800
committerTony Luck <tony.luck@intel.com>2025-02-20 17:02:33 -0800
commitd9207cf7760f5f5599e9ff7eb0fedf56821a1d59 (patch)
tree5000a31dde72b4f106a53a25aad5e35373bb68f7 /drivers/edac/skx_common.c
parent267e5b1d267539d9a927dc04aab6f15aca57da92 (diff)
EDAC/{skx_common,i10nm}: Fix some missing error reports on Emerald Rapids
When doing error injection to some memory DIMMs on certain Intel Emerald Rapids servers, the i10nm_edac missed error reports for some memory DIMMs. Certain BIOS configurations may hide some memory controllers, and the i10nm_edac doesn't enumerate these hidden memory controllers. However, the ADXL decodes memory errors using memory controller physical indices even if there are hidden memory controllers. Therefore, the memory controller physical indices reported by the ADXL may mismatch the logical indices enumerated by the i10nm_edac, resulting in missed error reports for some memory DIMMs. Fix this issue by creating a mapping table from memory controller physical indices (used by the ADXL) to logical indices (used by the i10nm_edac) and using it to convert the physical indices to the logical indices during the error handling process. Fixes: c545f5e41225 ("EDAC/i10nm: Skip the absent memory controllers") Reported-by: Kevin Chang <kevin1.chang@intel.com> Tested-by: Kevin Chang <kevin1.chang@intel.com> Reported-by: Thomas Chen <Thomas.Chen@intel.com> Tested-by: Thomas Chen <Thomas.Chen@intel.com> Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/r/20250214002728.6287-1-qiuxu.zhuo@intel.com
Diffstat (limited to 'drivers/edac/skx_common.c')
-rw-r--r--drivers/edac/skx_common.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index f7bd930e058fe..fa5b442b18449 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -121,6 +121,35 @@ void skx_adxl_put(void)
}
EXPORT_SYMBOL_GPL(skx_adxl_put);
+static void skx_init_mc_mapping(struct skx_dev *d)
+{
+ /*
+ * By default, the BIOS presents all memory controllers within each
+ * socket to the EDAC driver. The physical indices are the same as
+ * the logical indices of the memory controllers enumerated by the
+ * EDAC driver.
+ */
+ for (int i = 0; i < NUM_IMC; i++)
+ d->mc_mapping[i] = i;
+}
+
+void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
+{
+ edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+ pmc, lmc);
+
+ d->mc_mapping[pmc] = lmc;
+}
+EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
+
+static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
+{
+ edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+ pmc, d->mc_mapping[pmc]);
+
+ return d->mc_mapping[pmc];
+}
+
static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
{
struct skx_dev *d;
@@ -188,6 +217,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
return false;
}
+ res->imc = skx_get_mc_mapping(d, res->imc);
+
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
@@ -326,6 +357,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
list_add_tail(&d->list, &dev_edac_list);
prev = pdev;
+
+ skx_init_mc_mapping(d);
}
if (list)