10 files changed, 1010 insertions, 12 deletions
diff --git a/drivers/accel/habanalabs/common/Makefile b/drivers/accel/habanalabs/common/Makefile
index e6abffea9f87..b6d00de09db5 100644
--- a/drivers/accel/habanalabs/common/Makefile
+++ b/drivers/accel/habanalabs/common/Makefile
@@ -13,3 +13,8 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 		common/command_submission.o common/firmware_if.o \
 		common/security.o common/state_dump.o \
 		common/memory_mgr.o common/decoder.o
+
+# Conditionally add HLDIO support
+ifdef CONFIG_HL_HLDIO
+HL_COMMON_FILES += common/hldio.o
+endif
+\ No newline at end of file
diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index 4b391807e5f2..5f0820b19ccb 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -6,6 +6,7 @@
  */
 
 #include "habanalabs.h"
+#include "hldio.h"
 #include "../include/hw_ip/mmu/mmu_general.h"
 
 #include <linux/pci.h>
@@ -602,6 +603,198 @@ static int engines_show(struct seq_file *s, void *data)
 	return 0;
 }
 
+#ifdef CONFIG_HL_HLDIO
+/* DIO debugfs functions following the standard pattern */
+static int dio_ssd2hl_show(struct seq_file *s, void *data)
+{
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+
+	if (!hdev->asic_prop.supports_nvme) {
+		seq_puts(s, "NVMe Direct I/O not supported\\n");
+		return 0;
+	}
+
+	seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n");
+	seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read);
+	seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n");
+
+	return 0;
+}
+
+static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *f_pos)
+{
+	struct seq_file *s = file->private_data;
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+	struct hl_ctx *ctx = hdev->kernel_ctx;
+	char kbuf[128];
+	u64 device_va = 0, off_bytes = 0, len_bytes = 0;
+	u32 fd = 0;
+	size_t len_read = 0;
+	int rc, parsed;
+
+	if (!hdev->asic_prop.supports_nvme)
+		return -EOPNOTSUPP;
+
+	if (count >= sizeof(kbuf))
+		return -EINVAL;
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+
+	kbuf[count] = 0;
+
+	/* Parse: fd=N va=0xADDR off=N len=N */
+	parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu",
+			&fd, &device_va, &off_bytes, &len_bytes);
+	if (parsed != 4) {
+		dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n");
+		return -EINVAL;
+	}
+
+	/* Validate file descriptor */
+	if (fd == 0) {
+		dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd);
+		return -EINVAL;
+	}
+
+	/* Validate alignment requirements */
+	if (!IS_ALIGNED(device_va, PAGE_SIZE) ||
+	    !IS_ALIGNED(off_bytes, PAGE_SIZE) ||
+	    !IS_ALIGNED(len_bytes, PAGE_SIZE)) {
+		dev_err(hdev->dev,
+			"All parameters must be page-aligned (4KB)\\n");
+		return -EINVAL;
+	}
+
+	/* Validate transfer size */
+	if (len_bytes == 0 || len_bytes > SZ_1G) {
+		dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n",
+			len_bytes);
+		return -EINVAL;
+	}
+
+	dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n",
+		fd, device_va, off_bytes, len_bytes);
+
+	rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read);
+	if (rc < 0) {
+		dev_entry->dio_stats.failed_ops++;
+		dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc);
+		return rc;
+	}
+
+	/* Update statistics */
+	dev_entry->dio_stats.total_ops++;
+	dev_entry->dio_stats.successful_ops++;
+	dev_entry->dio_stats.bytes_transferred += len_read;
+	dev_entry->dio_stats.last_len_read = len_read;
+
+	dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read);
+
+	return count;
+}
+
+static int dio_hl2ssd_show(struct seq_file *s, void *data)
+{
+	seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n");
+	return 0;
+}
+
+static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf,
+			       size_t count, loff_t *f_pos)
+{
+	struct seq_file *s = file->private_data;
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+
+	if (!hdev->asic_prop.supports_nvme)
+		return -EOPNOTSUPP;
+
+	dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n");
+	return -EOPNOTSUPP;
+}
+
+static int dio_stats_show(struct seq_file *s, void *data)
+{
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+	struct hl_dio_stats *stats = &dev_entry->dio_stats;
+	u64 avg_bytes_per_op = 0, success_rate = 0;
+
+	if (!hdev->asic_prop.supports_nvme) {
+		seq_puts(s, "NVMe Direct I/O not supported\\n");
+		return 0;
+	}
+
+	if (stats->successful_ops > 0)
+		avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops;
+
+	if (stats->total_ops > 0)
+		success_rate = (stats->successful_ops * 100) / stats->total_ops;
+
+	seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n");
+	seq_printf(s, "Total operations:     %llu\\n", stats->total_ops);
+	seq_printf(s, "Successful ops:       %llu\\n", stats->successful_ops);
+	seq_printf(s, "Failed ops:           %llu\\n", stats->failed_ops);
+	seq_printf(s, "Success rate:         %llu%%\\n", success_rate);
+	seq_printf(s, "Total bytes:          %llu\\n", stats->bytes_transferred);
+	seq_printf(s, "Avg bytes per op:     %llu\\n", avg_bytes_per_op);
+	seq_printf(s, "Last transfer:        %zu bytes\\n", stats->last_len_read);
+
+	return 0;
+}
+
+static int dio_reset_show(struct seq_file *s, void *data)
+{
+	seq_puts(s, "Write '1' to reset DIO statistics\\n");
+	return 0;
+}
+
+static ssize_t dio_reset_write(struct file *file, const char __user *buf,
+			       size_t count, loff_t *f_pos)
+{
+	struct seq_file *s = file->private_data;
+	struct hl_debugfs_entry *entry = s->private;
+	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+	struct hl_device *hdev = dev_entry->hdev;
+	char kbuf[8];
+	unsigned long val;
+	int rc;
+
+	if (!hdev->asic_prop.supports_nvme)
+		return -EOPNOTSUPP;
+
+	if (count >= sizeof(kbuf))
+		return -EINVAL;
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+
+	kbuf[count] = 0;
+
+	rc = kstrtoul(kbuf, 0, &val);
+	if (rc)
+		return rc;
+
+	if (val == 1) {
+		memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
+		dev_dbg(hdev->dev, "DIO statistics reset\\n");
+	} else {
+		dev_err(hdev->dev, "Write '1' to reset statistics\\n");
+		return -EINVAL;
+	}
+
+	return count;
+}
+#endif
+
 static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
 					size_t count, loff_t *ppos)
 {
@@ -788,6 +981,113 @@ static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
 	}
 }
 
+static void dump_cfg_access_entry(struct hl_device *hdev,
+				  struct hl_debugfs_cfg_access_entry *entry)
+{
+	char *access_type = "";
+	struct tm tm;
+
+	switch (entry->debugfs_type) {
+	case DEBUGFS_READ32:
+		access_type = "READ32 from";
+		break;
+	case DEBUGFS_WRITE32:
+		access_type = "WRITE32 to";
+		break;
+	case DEBUGFS_READ64:
+		access_type = "READ64 from";
+		break;
+	case DEBUGFS_WRITE64:
+		access_type = "WRITE64 to";
+		break;
+	default:
+		dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type);
+		return;
+	}
+
+	time64_to_tm(entry->seconds_since_epoch, 0, &tm);
+	dev_info(hdev->dev,
+		"%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1,
+		tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr);
+}
+
+void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
+{
+	struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses;
+	u32 i, head, count = 0;
+	time64_t entry_time, now;
+	unsigned long flags;
+
+	now = ktime_get_real_seconds();
+
+	spin_lock_irqsave(&dbgfs->lock, flags);
+	head = dbgfs->head;
+	if (head == 0)
+		i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
+	else
+		i = head - 1;
+
+	/* Walk back until timeout or invalid entry */
+	while (dbgfs->cfg_access_list[i].valid) {
+		entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch;
+		/* Stop when entry is older than timeout */
+		if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC)
+			break;
+
+		/* print single entry under lock */
+		{
+			struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i];
+			/*
+			 * We copy the entry out under lock and then print after
+			 * releasing the lock to minimize time under lock.
+			 */
+			spin_unlock_irqrestore(&dbgfs->lock, flags);
+			dump_cfg_access_entry(hdev, &entry);
+			spin_lock_irqsave(&dbgfs->lock, flags);
+		}
+
+		/* mark consumed */
+		dbgfs->cfg_access_list[i].valid = false;
+
+		if (i == 0)
+			i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
+		else
+			i--;
+		count++;
+		if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN)
+			break;
+	}
+	spin_unlock_irqrestore(&dbgfs->lock, flags);
+}
+
+static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size,
+					enum debugfs_access_type access_type)
+{
+	struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses;
+	struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG];
+	struct hl_debugfs_cfg_access_entry *new_entry;
+	unsigned long flags;
+
+	/* Check if address is in config memory */
+	if (addr >= mem_reg->region_base &&
+		mem_reg->region_size >= access_size &&
+		addr <= mem_reg->region_base + mem_reg->region_size - access_size) {
+
+		spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags);
+
+		new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head];
+		new_entry->seconds_since_epoch = ktime_get_real_seconds();
+		new_entry->addr = addr;
+		new_entry->debugfs_type = access_type;
+		new_entry->valid = true;
+		dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1)
+						% HL_DBGFS_CFG_ACCESS_HIST_LEN;
+
+		spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags);
+
+	}
+}
+
 static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
 				enum debugfs_access_type acc_type)
 {
@@ -805,6 +1105,7 @@ static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
 			return rc;
 	}
 
+	check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type);
 	rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
 	if (rc) {
 		dev_err(hdev->dev,
@@ -1525,6 +1826,13 @@ static const struct hl_info_list hl_debugfs_list[] = {
 	{"mmu", mmu_show, mmu_asid_va_write},
 	{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
 	{"engines", engines_show, NULL},
+#ifdef CONFIG_HL_HLDIO
+	/* DIO entries - only created if NVMe is supported */
+	{"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write},
+	{"dio_stats", dio_stats_show, NULL},
+	{"dio_reset", dio_reset_show, dio_reset_write},
+	{"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write},
+#endif
 };
 
 static int hl_debugfs_open(struct inode *inode, struct file *file)
@@ -1723,6 +2031,11 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
 				&hdev->asic_prop.server_type);
 
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+		/* Skip DIO entries if NVMe is not supported */
+		if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 &&
+		    !hdev->asic_prop.supports_nvme)
+			continue;
+
 		debugfs_create_file(hl_debugfs_list[i].name,
 					0644,
 					root,
@@ -1762,6 +2075,14 @@ int hl_debugfs_device_init(struct hl_device *hdev)
 	spin_lock_init(&dev_entry->userptr_spinlock);
 	mutex_init(&dev_entry->ctx_mem_hash_mutex);
 
+	spin_lock_init(&hdev->debugfs_cfg_accesses.lock);
+	hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */
+
+#ifdef CONFIG_HL_HLDIO
+	/* Initialize DIO statistics */
+	memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
+#endif
+
 	return 0;
 }
 
@@ -1780,6 +2101,7 @@ void hl_debugfs_device_fini(struct hl_device *hdev)
 		vfree(entry->state_dump[i]);
 
 	kfree(entry->entry_arr);
+
 }
 
 void hl_debugfs_add_device(struct hl_device *hdev)
@@ -1792,6 +2114,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 
 	if (!hdev->asic_prop.fw_security_enabled)
 		add_secured_nodes(dev_entry, dev_entry->root);
+
 }
 
 void hl_debugfs_add_file(struct hl_fpriv *hpriv)
@@ -1924,3 +2247,4 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 
 	up_write(&dev_entry->state_dump_sem);
 }
+
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 80fa08bf57bd..999c92d7036e 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1630,6 +1630,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
 	reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release;
 
+	if (hdev->cpld_shutdown) {
+		dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n");
+		return -EIO;
+	}
+
 	if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) {
 		dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n");
 		return 0;
@@ -2576,6 +2581,14 @@ void hl_device_fini(struct hl_device *hdev)
 	if (rc)
 		dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);
 
+	/* Reset the H/W (if it accessible). It will be in idle state after this returns */
+	if (!hdev->cpld_shutdown) {
+		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+		if (rc)
+			dev_err(hdev->dev,
+				"hw_fini failed in device fini while removing device %d\n", rc);
+	}
+
 	hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
 
 	/* Release kernel context */
@@ -2943,3 +2956,13 @@ void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *eve
 
 	mutex_unlock(&clk_throttle->lock);
 }
+
+void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask)
+{
+	hl_handle_critical_hw_err(hdev, event_id, event_mask);
+	*event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+
+	/* Avoid any new accesses to the H/W */
+	hdev->disabled = true;
+	hdev->cpld_shutdown = true;
+}
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 6f27ce4fa01b..d94c2ba22a6a 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -90,7 +90,9 @@ struct hl_fpriv;
 #define HL_COMMON_USER_CQ_INTERRUPT_ID	0xFFF
 #define HL_COMMON_DEC_INTERRUPT_ID	0xFFE
 
-#define HL_STATE_DUMP_HIST_LEN		5
+#define HL_STATE_DUMP_HIST_LEN			5
+#define HL_DBGFS_CFG_ACCESS_HIST_LEN		20
+#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC	2 /* 2s */
 
 /* Default value for device reset trigger , an invalid value */
 #define HL_RESET_TRIGGER_DEFAULT	0xFF
@@ -702,6 +704,7 @@ struct hl_hints_range {
  * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
  * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
  * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
+ * @supports_nvme: indicates whether the asic supports NVMe P2P DMA.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -822,6 +825,7 @@ struct asic_fixed_properties {
 	u8				supports_advanced_cpucp_rc;
 	u8				supports_engine_modes;
 	u8				support_dynamic_resereved_fw_size;
+	u8				supports_nvme;
 };
 
 /**
@@ -2274,6 +2278,9 @@ struct hl_vm {
 	u8			init_done;
 };
 
+#ifdef CONFIG_HL_HLDIO
+#include "hldio.h"
+#endif
 
 /*
  * DEBUG, PROFILING STRUCTURE
@@ -2344,7 +2351,6 @@ struct hl_fpriv {
 	struct mutex			ctx_lock;
 };
 
-
 /*
  * DebugFS
  */
@@ -2372,6 +2378,7 @@ struct hl_debugfs_entry {
 	struct hl_dbg_device_entry	*dev_entry;
 };
 
+
 /**
  * struct hl_dbg_device_entry - ASIC specific debugfs manager.
  * @root: root dentry.
@@ -2403,6 +2410,7 @@ struct hl_debugfs_entry {
  * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
  * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
  * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read.
+ * @dio_stats: Direct I/O statistics
  */
 struct hl_dbg_device_entry {
 	struct dentry			*root;
@@ -2434,6 +2442,35 @@ struct hl_dbg_device_entry {
 	u8				i2c_addr;
 	u8				i2c_reg;
 	u8				i2c_len;
+#ifdef CONFIG_HL_HLDIO
+	struct hl_dio_stats	dio_stats;
+#endif
+};
+
+/**
+ * struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of
+ * hl_debugfs_cfg_access.
+ * @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons.
+ * @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64.
+ * @addr: the requested address to access.
+ * @valid: if set, this entry has valid data for dumping at interrupt time.
+ */
+struct hl_debugfs_cfg_access_entry {
+	ktime_t				seconds_since_epoch;
+	enum debugfs_access_type	debugfs_type;
+	u64				addr;
+	bool				valid;
+};
+
+/**
+ * struct hl_debugfs_cfg_access - saves debugfs config region access requests history.
+ * @cfg_access_list: list of objects describing config region access requests.
+ * @head: next valid index to add new entry to in cfg_access_list.
+ */
+struct hl_debugfs_cfg_access {
+	struct hl_debugfs_cfg_access_entry	cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN];
+	u32					head;
+	spinlock_t			lock; /* protects head and entries */
 };
 
 /**
@@ -3281,6 +3318,7 @@ struct eq_heartbeat_debug_info {
  * @hl_chip_info: ASIC's sensors information.
  * @device_status_description: device status description.
  * @hl_debugfs: device's debugfs manager.
+ * @debugfs_cfg_accesses: list of last debugfs config region accesses.
  * @cb_pool: list of pre allocated CBs.
  * @cb_pool_lock: protects the CB pool.
  * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
@@ -3305,6 +3343,7 @@ struct eq_heartbeat_debug_info {
  * @captured_err_info: holds information about errors.
  * @reset_info: holds current device reset information.
  * @heartbeat_debug_info: counters used to debug heartbeat failures.
+ * @hldio: describes habanalabs direct storage interaction interface.
  * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
  * @stream_master_qid_arr: pointer to array with QIDs of master streams.
  * @fw_inner_major_ver: the major of current loaded preboot inner version.
@@ -3357,6 +3396,7 @@ struct eq_heartbeat_debug_info {
  *                    addresses.
  * @is_in_dram_scrub: true if dram scrub operation is on going.
  * @disabled: is device disabled.
+ * @cpld_shutdown: is cpld shutdown.
  * @late_init_done: is late init stage was done during initialization.
  * @hwmon_initialized: is H/W monitor sensors was initialized.
  * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
@@ -3461,6 +3501,7 @@ struct hl_device {
 	struct hwmon_chip_info		*hl_chip_info;
 
 	struct hl_dbg_device_entry	hl_debugfs;
+	struct hl_debugfs_cfg_access	debugfs_cfg_accesses;
 
 	struct list_head		cb_pool;
 	spinlock_t			cb_pool_lock;
@@ -3496,7 +3537,9 @@ struct hl_device {
 	struct hl_reset_info		reset_info;
 
 	struct eq_heartbeat_debug_info	heartbeat_debug_info;
-
+#ifdef CONFIG_HL_HLDIO
+	struct hl_dio			hldio;
+#endif
 	cpumask_t			irq_affinity_mask;
 
 	u32				*stream_master_qid_arr;
@@ -3532,6 +3575,7 @@ struct hl_device {
 	u16				cpu_pci_msb_addr;
 	u8				is_in_dram_scrub;
 	u8				disabled;
+	u8				cpld_shutdown;
 	u8				late_init_done;
 	u8				hwmon_initialized;
 	u8				reset_on_lockup;
@@ -4089,6 +4133,7 @@ void hl_init_cpu_for_irq(struct hl_device *hdev);
 void hl_set_irq_affinity(struct hl_device *hdev, int irq);
 void hl_eq_heartbeat_event_handle(struct hl_device *hdev);
 void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask);
+void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -4110,6 +4155,7 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
 void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 					unsigned long length);
+void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev);
 
 #else
 
@@ -4185,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
 {
 }
 
+static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
+{
+}
+
 #endif
 
 /* Security */
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index dc80ca921d90..fdfdabc85e54 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -961,6 +961,12 @@ static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *
 	case HL_PASSTHROUGH_VERSIONS:
 		need_input_buff = false;
 		break;
+	case  HL_GET_ERR_COUNTERS_CMD:
+		need_input_buff = true;
+		break;
+	case HL_GET_P_STATE:
+		need_input_buff = false;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/accel/habanalabs/common/hldio.c b/drivers/accel/habanalabs/common/hldio.c
new file mode 100644
index 000000000000..083ae5610875
--- /dev/null
+++ b/drivers/accel/habanalabs/common/hldio.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2024 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "hldio.h"
+#include <generated/uapi/linux/version.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+
+/*
+ * NVMe Direct I/O implementation for habanalabs driver
+ *
+ * ASSUMPTIONS
+ * ===========
+ * 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless).
+ * 2. Only READ operations (can extend in the future).
+ * 3. No sparse files (can overcome this in the future).
+ * 4. Kernel version >= 6.9
+ * 5. Requiring page alignment is OK (I don't see a solution to this one right,
+ *    now, how do we read partial pages?)
+ * 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel.
+ *    Theoretically I have a slight idea on how this could be solvable, but it
+ *    is probably inacceptable for the upstream. Also may not work in the end.
+ * 7. Either make sure our cards and disks are under the same PCI bridge, or
+ *    compile a custom kernel to hack around this.
+ */
+
+#define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */
+
+/*
+ * This struct contains all the useful data I could milk out of the file handle
+ * provided by the user.
+ * @TODO: right now it is retrieved on each IO, but can be done once with some
+ * dedicated IOCTL, call it for example HL_REGISTER_HANDLE.
+ */
+struct hl_dio_fd {
+	/* Back pointer in case we need it in async completion */
+	struct hl_ctx *ctx;
+	/* Associated fd struct */
+	struct file *filp;
+};
+
+/*
+ * This is a single IO descriptor
+ */
+struct hl_direct_io {
+	struct hl_dio_fd f;
+	struct kiocb kio;
+	struct bio_vec *bv;
+	struct iov_iter iter;
+	u64 device_va;
+	u64 off_bytes;
+	u64 len_bytes;
+	u32 type;
+};
+
+bool hl_device_supports_nvme(struct hl_device *hdev)
+{
+	return hdev->asic_prop.supports_nvme;
+}
+
+static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct block_device *bd;
+	struct super_block *sb;
+	struct inode *inode;
+	struct gendisk *gd;
+	struct device *disk_dev;
+	int rc;
+
+	f->filp = fget(fd);
+	if (!f->filp) {
+		rc = -ENOENT;
+		goto out;
+	}
+
+	if (!(f->filp->f_flags & O_DIRECT)) {
+		dev_err(hdev->dev, "file is not in the direct mode\n");
+		rc = -EINVAL;
+		goto fput;
+	}
+
+	if (!f->filp->f_op->read_iter) {
+		dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n");
+		rc = -EINVAL;
+		goto fput;
+	}
+
+	inode = file_inode(f->filp);
+	sb = inode->i_sb;
+	bd = sb->s_bdev;
+	gd = bd->bd_disk;
+
+	if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) {
+		dev_err(hdev->dev, "sparse files are not currently supported\n");
+		rc = -EINVAL;
+		goto fput;
+	}
+
+	if (!bd || !gd) {
+		dev_err(hdev->dev, "invalid block device\n");
+		rc = -ENODEV;
+		goto fput;
+	}
+	/* Get the underlying device from the block device */
+	disk_dev = disk_to_dev(gd);
+	if (!dma_pci_p2pdma_supported(disk_dev)) {
+		dev_err(hdev->dev, "device does not support PCI P2P DMA\n");
+		rc = -EOPNOTSUPP;
+		goto fput;
+	}
+
+	/*
+	 * @TODO: Maybe we need additional checks here
+	 */
+
+	f->ctx = ctx;
+	rc = 0;
+
+	goto out;
+fput:
+	fput(f->filp);
+out:
+	return rc;
+}
+
+static void hl_dio_fd_unregister(struct hl_dio_fd *f)
+{
+	fput(f->filp);
+}
+
+static long hl_dio_count_io(struct hl_device *hdev)
+{
+	s64 sum = 0;
+	int i;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(*hdev->hldio.inflight_ios, i);
+
+	return sum;
+}
+
+static bool hl_dio_get_iopath(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+
+	if (hdev->hldio.io_enabled) {
+		this_cpu_inc(*hdev->hldio.inflight_ios);
+
+		/* Avoid race conditions */
+		if (!hdev->hldio.io_enabled) {
+			this_cpu_dec(*hdev->hldio.inflight_ios);
+			return false;
+		}
+
+		hl_ctx_get(ctx);
+
+		return true;
+	}
+
+	return false;
+}
+
+static void hl_dio_put_iopath(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+
+	hl_ctx_put(ctx);
+	this_cpu_dec(*hdev->hldio.inflight_ios);
+}
+
+static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled)
+{
+	hdev->hldio.io_enabled = enabled;
+}
+
+static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io)
+{
+	if ((u64)io->device_va & ~PAGE_MASK) {
+		dev_dbg(hdev->dev, "device address must be 4K aligned\n");
+		return false;
+	}
+
+	if (io->len_bytes & ~PAGE_MASK) {
+		dev_dbg(hdev->dev, "IO length must be 4K aligned\n");
+		return false;
+	}
+
+	if (io->off_bytes & ~PAGE_MASK) {
+		dev_dbg(hdev->dev, "IO offset must be 4K aligned\n");
+		return false;
+	}
+
+	return true;
+}
+
+static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va)
+{
+	struct hl_dio *hldio = &hdev->hldio;
+	u64 device_pa;
+	int rc, i;
+
+	rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa);
+	if (rc) {
+		dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)",
+				device_va, rc);
+		return NULL;
+	}
+
+	for (i = 0 ; i < hldio->np2prs ; ++i) {
+		if (device_pa >= hldio->p2prs[i].device_pa &&
+		    device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size)
+			return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >>
+				PAGE_SHIFT];
+	}
+
+	return NULL;
+}
+
+static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io)
+{
+	u64 npages, device_va;
+	ssize_t rc;
+	int i;
+
+	if (!hl_dio_validate_io(hdev, io))
+		return -EINVAL;
+
+	if (!hl_dio_get_iopath(io->f.ctx)) {
+		dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n");
+		return -ESHUTDOWN;
+	}
+
+	init_sync_kiocb(&io->kio, io->f.filp);
+	io->kio.ki_pos = io->off_bytes;
+
+	npages = (io->len_bytes >> PAGE_SHIFT);
+
+	/* @TODO: this can be implemented smarter, vmalloc in iopath is not
+	 * ideal. Maybe some variation of genpool. Number of pages may differ
+	 * greatly, so maybe even use pools of different sizes and chose the
+	 * closest one.
+	 */
+	io->bv = vzalloc(npages * sizeof(struct bio_vec));
+	if (!io->bv)
+		return -ENOMEM;
+
+	for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) {
+		io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va);
+		if (!io->bv[i].bv_page) {
+			dev_err(hdev->dev, "error getting page struct for device va %#llx",
+					device_va);
+			rc = -EFAULT;
+			goto cleanup;
+		}
+		io->bv[i].bv_offset = 0;
+		io->bv[i].bv_len = PAGE_SIZE;
+	}
+
+	iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes);
+	if (io->f.filp->f_op && io->f.filp->f_op->read_iter)
+		rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter);
+	else
+		rc = -EINVAL;
+
+cleanup:
+	vfree(io->bv);
+	hl_dio_put_iopath(io->f.ctx);
+
+	dev_dbg(hdev->dev, "IO ended with %ld\n", rc);
+
+	return rc;
+}
+
+/*
+ * @TODO: This function can be used as a callback for io completion under
+ * kio->ki_complete in order to implement async IO.
+ * Note that on more recent kernels there is no ret2.
+ */
+__maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2)
+{
+	struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio);
+
+	dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret);
+
+	/* Do something to copy result to user / notify completion */
+
+	hl_dio_put_iopath(io->f.ctx);
+
+	hl_dio_fd_unregister(&io->f);
+}
+
+/*
+ * DMA disk to ASIC, wait for results. Must be invoked from the user context
+ */
+int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
+		  u64 device_va, off_t off_bytes, size_t len_bytes,
+		  size_t *len_read)
+{
+	struct hl_direct_io *io;
+	ssize_t rc;
+
+	dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes);
+
+	io = kzalloc(sizeof(*io), GFP_KERNEL);
+	if (!io) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	*io = (struct hl_direct_io){
+		.device_va = device_va,
+		.len_bytes = len_bytes,
+		.off_bytes = off_bytes,
+		.type = READ,
+	};
+
+	rc = hl_dio_fd_register(ctx, fd, &io->f);
+	if (rc)
+		goto kfree_io;
+
+	rc = hl_direct_io(hdev, io);
+	if (rc >= 0) {
+		*len_read = rc;
+		rc = 0;
+	}
+
+	/* This shall be called only in the case of a sync IO */
+	hl_dio_fd_unregister(&io->f);
+kfree_io:
+	kfree(io);
+out:
+	return rc;
+}
+
+static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr)
+{
+	if (p2pr->p2ppages) {
+		vfree(p2pr->p2ppages);
+		p2pr->p2ppages = NULL;
+	}
+
+	if (p2pr->p2pmem) {
+		dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n",
+				p2pr->p2pmem, p2pr->size);
+		pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size);
+		p2pr->p2pmem = NULL;
+	}
+}
+
+void hl_p2p_region_fini_all(struct hl_device *hdev)
+{
+	int i;
+
+	for (i = 0 ; i < hdev->hldio.np2prs ; ++i)
+		hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]);
+
+	kvfree(hdev->hldio.p2prs);
+	hdev->hldio.p2prs = NULL;
+	hdev->hldio.np2prs = 0;
+}
+
+int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
+{
+	void *addr;
+	int rc, i;
+
+	/* Start by publishing our p2p memory */
+	rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset);
+	if (rc) {
+		dev_err(hdev->dev, "error adding p2p resource: %d\n", rc);
+		goto err;
+	}
+
+	/* Alloc all p2p mem */
+	p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size);
+	if (!p2pr->p2pmem) {
+		dev_err(hdev->dev, "error allocating p2p memory\n");
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *));
+	if (!p2pr->p2ppages) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) {
+		p2pr->p2ppages[i] = virt_to_page(addr);
+		if (!p2pr->p2ppages[i]) {
+			rc = -EFAULT;
+			goto err;
+		}
+	}
+
+	return 0;
+err:
+	hl_p2p_region_fini(hdev, p2pr);
+	return rc;
+}
+
+int hl_dio_start(struct hl_device *hdev)
+{
+	dev_dbg(hdev->dev, "initializing HLDIO\n");
+
+	/* Initialize the IO counter and enable IO */
+	hdev->hldio.inflight_ios = alloc_percpu(s64);
+	if (!hdev->hldio.inflight_ios)
+		return -ENOMEM;
+
+	hl_dio_set_io_enabled(hdev, true);
+
+	return 0;
+}
+
+void hl_dio_stop(struct hl_device *hdev)
+{
+	dev_dbg(hdev->dev, "deinitializing HLDIO\n");
+
+	if (hdev->hldio.io_enabled) {
+		/* Wait for all the IO to finish */
+		hl_dio_set_io_enabled(hdev, false);
+		hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT);
+	}
+
+	if (hdev->hldio.inflight_ios) {
+		free_percpu(hdev->hldio.inflight_ios);
+		hdev->hldio.inflight_ios = NULL;
+	}
+}
diff --git a/drivers/accel/habanalabs/common/hldio.h b/drivers/accel/habanalabs/common/hldio.h
new file mode 100644
index 000000000000..2874388f2851
--- /dev/null
+++ b/drivers/accel/habanalabs/common/hldio.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver
+ *
+ * This feature requires specific hardware setup and must not be built
+ * under COMPILE_TEST.
+ */
+
+#ifndef __HL_HLDIO_H__
+#define __HL_HLDIO_H__
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/ktime.h>     /* ktime functions */
+#include <linux/delay.h>     /* usleep_range */
+#include <linux/kernel.h>    /* might_sleep_if */
+#include <linux/errno.h>     /* error codes */
+
+/* Forward declarations */
+struct hl_device;
+struct file;
+
+/* Enable only if Kconfig selected */
+#ifdef CONFIG_HL_HLDIO
+/**
+ * struct hl_p2p_region - describes a single P2P memory region
+ * @p2ppages: array of page structs for the P2P memory
+ * @p2pmem: virtual address of the P2P memory region
+ * @device_pa: physical address on the device
+ * @bar_offset: offset within the BAR
+ * @size: size of the region in bytes
+ * @bar: BAR number containing this region
+ */
+struct hl_p2p_region {
+	struct page **p2ppages;
+	void *p2pmem;
+	u64 device_pa;
+	u64 bar_offset;
+	u64 size;
+	int bar;
+};
+
+/**
+ * struct hl_dio_stats - Direct I/O statistics
+ * @total_ops: total number of operations attempted
+ * @successful_ops: number of successful operations
+ * @failed_ops: number of failed operations
+ * @bytes_transferred: total bytes successfully transferred
+ * @last_len_read: length of the last read operation
+ */
+struct hl_dio_stats {
+	u64 total_ops;
+	u64 successful_ops;
+	u64 failed_ops;
+	u64 bytes_transferred;
+	size_t last_len_read;
+};
+
+/**
+ * struct hl_dio - describes habanalabs direct storage interaction interface
+ * @p2prs: array of p2p regions
+ * @inflight_ios: percpu counter for inflight ios
+ * @np2prs: number of elements in p2prs
+ * @io_enabled: 1 if io is enabled 0 otherwise
+ */
+struct hl_dio {
+	struct hl_p2p_region *p2prs;
+	s64 __percpu *inflight_ios;
+	u8 np2prs;
+	u8 io_enabled;
+};
+
+int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
+		  u64 device_va, off_t off_bytes, size_t len_bytes,
+		  size_t *len_read);
+void hl_p2p_region_fini_all(struct hl_device *hdev);
+int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr);
+int hl_dio_start(struct hl_device *hdev);
+void hl_dio_stop(struct hl_device *hdev);
+
+/* Init/teardown */
+int hl_hldio_init(struct hl_device *hdev);
+void hl_hldio_fini(struct hl_device *hdev);
+
+/* File operations */
+long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+
+/* DebugFS hooks */
+#ifdef CONFIG_DEBUG_FS
+void hl_hldio_debugfs_init(struct hl_device *hdev);
+void hl_hldio_debugfs_fini(struct hl_device *hdev);
+#else
+static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
+static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
+#endif
+
+#else /* !CONFIG_HL_HLDIO */
+
+struct hl_p2p_region;
+/* Stubs when HLDIO is disabled */
+static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
+		  u64 device_va, off_t off_bytes, size_t len_bytes,
+		  size_t *len_read)
+{ return -EOPNOTSUPP; }
+static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {}
+static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
+{ return -EOPNOTSUPP; }
+static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; }
+static inline void hl_dio_stop(struct hl_device *hdev) {}
+
+static inline int hl_hldio_init(struct hl_device *hdev) { return 0; }
+static inline void hl_hldio_fini(struct hl_device *hdev) { }
+static inline long hl_hldio_ioctl(struct file *f, unsigned int c,
+				  unsigned long a)
+{ return -ENOTTY; }
+static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
+static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
+
+#endif /* CONFIG_HL_HLDIO */
+
+/* Simplified polling macro for HLDIO (no simulator support) */
+#define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \
+({ \
+	ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \
+	might_sleep_if(sleep_us); \
+	(void)(hdev); /* keep signature consistent, hdev unused */ \
+	for (;;) { \
+		mb(); /* ensure ordering of memory operations */ \
+		if (cond) \
+			break; \
+		if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
+			break; \
+		if (sleep_us) \
+			usleep_range((sleep_us >> 2) + 1, sleep_us); \
+	} \
+	(cond) ? 0 : -ETIMEDOUT; \
+})
+
+#ifdef CONFIG_HL_HLDIO
+bool hl_device_supports_nvme(struct hl_device *hdev);
+#else
+static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; }
+#endif
+
+#endif /* __HL_HLDIO_H__ */
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 61472a381904..633db4bff46f 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -1837,7 +1837,12 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
 	atomic_dec(&ctx->hdev->dmabuf_export_cnt);
 	hl_ctx_put(ctx);
 
-	/* Paired with get_file() in export_dmabuf() */
+	/*
+	 * Paired with get_file() in export_dmabuf().
+	 * 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called,
+	 * because releasing the compute device file involves another reference decrement, and it
+	 * would be possible only after calling fput().
+	 */
 	fput(ctx->hpriv->file_priv->filp);
 
 	kfree(hl_dmabuf);
@@ -2332,7 +2337,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
 		if (rc < 0)
 			goto destroy_pages;
 		npages = rc;
-		rc = -EFAULT;
+		rc = -ENOMEM;
 		goto put_pages;
 	}
 	userptr->npages = npages;
diff --git a/drivers/accel/habanalabs/common/memory_mgr.c b/drivers/accel/habanalabs/common/memory_mgr.c
index 99cd83139d46..4401beb99e42 100644
--- a/drivers/accel/habanalabs/common/memory_mgr.c
+++ b/drivers/accel/habanalabs/common/memory_mgr.c
@@ -259,13 +259,8 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
 		goto put_mem;
 	}
 
-#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
-	if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start,
-		       user_mem_size)) {
-#else
 	if (!access_ok((void __user *)(uintptr_t)vma->vm_start,
 		       user_mem_size)) {
-#endif
 		dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n",
 			buf->behavior->topic, vma->vm_start);
 
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 82f66520ec18..8f55ba3b4e73 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -96,14 +96,21 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
 	infineon_second_stage_third_instance =
 					(infineon_second_stage_version >> 16) & mask;
 
-	if (cpucp_info->infineon_second_stage_version)
+	if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version)
 		return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
 				le32_to_cpu(cpucp_info->infineon_version),
 				infineon_second_stage_first_instance,
 				infineon_second_stage_second_instance,
 				infineon_second_stage_third_instance);
-	else
+	else if (cpucp_info->infineon_second_stage_version)
+		return sprintf(buf, "%#04x:%#04x:%#04x\n",
+				infineon_second_stage_first_instance,
+				infineon_second_stage_second_instance,
+				infineon_second_stage_third_instance);
+	else if (cpucp_info->infineon_version)
 		return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
+
+	return 0;
 }
 
 static DEVICE_ATTR_RO(vrm_ver);