diff options
Diffstat (limited to 'drivers/accel/habanalabs/common')
-rw-r--r-- | drivers/accel/habanalabs/common/Makefile | 5 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/debugfs.c | 324 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/device.c | 23 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/habanalabs.h | 56 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/habanalabs_ioctl.c | 6 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/hldio.c | 437 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/hldio.h | 146 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/memory.c | 9 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/memory_mgr.c | 5 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/sysfs.c | 11 |
10 files changed, 1010 insertions, 12 deletions
diff --git a/drivers/accel/habanalabs/common/Makefile b/drivers/accel/habanalabs/common/Makefile index e6abffea9f87..b6d00de09db5 100644 --- a/drivers/accel/habanalabs/common/Makefile +++ b/drivers/accel/habanalabs/common/Makefile @@ -13,3 +13,8 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/command_submission.o common/firmware_if.o \ common/security.o common/state_dump.o \ common/memory_mgr.o common/decoder.o + +# Conditionally add HLDIO support +ifdef CONFIG_HL_HLDIO +HL_COMMON_FILES += common/hldio.o +endif
\ No newline at end of file diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index 4b391807e5f2..5f0820b19ccb 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -6,6 +6,7 @@ */ #include "habanalabs.h" +#include "hldio.h" #include "../include/hw_ip/mmu/mmu_general.h" #include <linux/pci.h> @@ -602,6 +603,198 @@ static int engines_show(struct seq_file *s, void *data) return 0; } +#ifdef CONFIG_HL_HLDIO +/* DIO debugfs functions following the standard pattern */ +static int dio_ssd2hl_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + if (!hdev->asic_prop.supports_nvme) { + seq_puts(s, "NVMe Direct I/O not supported\\n"); + return 0; + } + + seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n"); + seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read); + seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n"); + + return 0; +} + +static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + struct hl_ctx *ctx = hdev->kernel_ctx; + char kbuf[128]; + u64 device_va = 0, off_bytes = 0, len_bytes = 0; + u32 fd = 0; + size_t len_read = 0; + int rc, parsed; + + if (!hdev->asic_prop.supports_nvme) + return -EOPNOTSUPP; + + if (count >= sizeof(kbuf)) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count] = 0; + + /* Parse: fd=N va=0xADDR off=N len=N */ + parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu", + &fd, &device_va, &off_bytes, &len_bytes); + if (parsed != 4) { + dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n"); + return -EINVAL; + } + + /* Validate file descriptor */ + if (fd == 0) { + dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd); + return -EINVAL; + } + + /* Validate alignment requirements */ + if (!IS_ALIGNED(device_va, PAGE_SIZE) || + !IS_ALIGNED(off_bytes, PAGE_SIZE) || + !IS_ALIGNED(len_bytes, PAGE_SIZE)) { + dev_err(hdev->dev, + "All parameters must be page-aligned (4KB)\\n"); + return -EINVAL; + } + + /* Validate transfer size */ + if (len_bytes == 0 || len_bytes > SZ_1G) { + dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n", + len_bytes); + return -EINVAL; + } + + dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n", + fd, device_va, off_bytes, len_bytes); + + rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read); + if (rc < 0) { + dev_entry->dio_stats.failed_ops++; + dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc); + return rc; + } + + /* Update statistics */ + dev_entry->dio_stats.total_ops++; + dev_entry->dio_stats.successful_ops++; + dev_entry->dio_stats.bytes_transferred += len_read; + dev_entry->dio_stats.last_len_read = len_read; + + dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read); + + return count; +} + +static int dio_hl2ssd_show(struct seq_file *s, void *data) +{ + seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n"); + return 0; +} + +static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + if (!hdev->asic_prop.supports_nvme) + return -EOPNOTSUPP; + + dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n"); + return -EOPNOTSUPP; +} + +static int dio_stats_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + struct hl_dio_stats *stats = &dev_entry->dio_stats; + u64 avg_bytes_per_op = 0, success_rate = 0; + + if (!hdev->asic_prop.supports_nvme) { + seq_puts(s, "NVMe Direct I/O not supported\\n"); + return 0; + } + + if (stats->successful_ops > 0) + avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops; + + if (stats->total_ops > 0) + success_rate = (stats->successful_ops * 100) / stats->total_ops; + + seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n"); + seq_printf(s, "Total operations: %llu\\n", stats->total_ops); + seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops); + seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops); + seq_printf(s, "Success rate: %llu%%\\n", success_rate); + seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred); + seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op); + seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read); + + return 0; +} + +static int dio_reset_show(struct seq_file *s, void *data) +{ + seq_puts(s, "Write '1' to reset DIO statistics\\n"); + return 0; +} + +static ssize_t dio_reset_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + char kbuf[8]; + unsigned long val; + int rc; + + if (!hdev->asic_prop.supports_nvme) + return -EOPNOTSUPP; + + if (count >= sizeof(kbuf)) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count] = 0; + + rc = kstrtoul(kbuf, 0, &val); + if (rc) + return rc; + + if (val == 1) { + memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats)); + dev_dbg(hdev->dev, "DIO statistics reset\\n"); + } else { + dev_err(hdev->dev, "Write '1' to reset statistics\\n"); + return -EINVAL; + } + + return count; +} +#endif + static ssize_t hl_memory_scrub(struct file *f, const char __user *buf, size_t count, loff_t *ppos) { @@ -788,6 +981,113 @@ static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val, } } +static void dump_cfg_access_entry(struct hl_device *hdev, + struct hl_debugfs_cfg_access_entry *entry) +{ + char *access_type = ""; + struct tm tm; + + switch (entry->debugfs_type) { + case DEBUGFS_READ32: + access_type = "READ32 from"; + break; + case DEBUGFS_WRITE32: + access_type = "WRITE32 to"; + break; + case DEBUGFS_READ64: + access_type = "READ64 from"; + break; + case DEBUGFS_WRITE64: + access_type = "WRITE64 to"; + break; + default: + dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type); + return; + } + + time64_to_tm(entry->seconds_since_epoch, 0, &tm); + dev_info(hdev->dev, + "%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1, + tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr); +} + +void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) +{ + struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses; + u32 i, head, count = 0; + time64_t entry_time, now; + unsigned long flags; + + now = ktime_get_real_seconds(); + + spin_lock_irqsave(&dbgfs->lock, flags); + head = dbgfs->head; + if (head == 0) + i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1; + else + i = head - 1; + + /* Walk back until timeout or invalid entry */ + while (dbgfs->cfg_access_list[i].valid) { + entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch; + /* Stop when entry is older than timeout */ + if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC) + break; + + /* print single entry under lock */ + { + struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i]; + /* + * We copy the entry out under lock and then print after + * releasing the lock to minimize time under lock. + */ + spin_unlock_irqrestore(&dbgfs->lock, flags); + dump_cfg_access_entry(hdev, &entry); + spin_lock_irqsave(&dbgfs->lock, flags); + } + + /* mark consumed */ + dbgfs->cfg_access_list[i].valid = false; + + if (i == 0) + i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1; + else + i--; + count++; + if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN) + break; + } + spin_unlock_irqrestore(&dbgfs->lock, flags); +} + +static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size, + enum debugfs_access_type access_type) +{ + struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses; + struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG]; + struct hl_debugfs_cfg_access_entry *new_entry; + unsigned long flags; + + /* Check if address is in config memory */ + if (addr >= mem_reg->region_base && + mem_reg->region_size >= access_size && + addr <= mem_reg->region_base + mem_reg->region_size - access_size) { + + spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags); + + new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head]; + new_entry->seconds_since_epoch = ktime_get_real_seconds(); + new_entry->addr = addr; + new_entry->debugfs_type = access_type; + new_entry->valid = true; + dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1) + % HL_DBGFS_CFG_ACCESS_HIST_LEN; + + spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags); + + } +} + static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type) { @@ -805,6 +1105,7 @@ static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, return rc; } + check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type); rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found); if (rc) { dev_err(hdev->dev, @@ -1525,6 +1826,13 @@ static const struct hl_info_list hl_debugfs_list[] = { {"mmu", mmu_show, mmu_asid_va_write}, {"mmu_error", mmu_ack_error, mmu_ack_error_value_write}, {"engines", engines_show, NULL}, +#ifdef CONFIG_HL_HLDIO + /* DIO entries - only created if NVMe is supported */ + {"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write}, + {"dio_stats", dio_stats_show, NULL}, + {"dio_reset", dio_reset_show, dio_reset_write}, + {"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write}, +#endif }; static int hl_debugfs_open(struct inode *inode, struct file *file) @@ -1723,6 +2031,11 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent &hdev->asic_prop.server_type); for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { + /* Skip DIO entries if NVMe is not supported */ + if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 && + !hdev->asic_prop.supports_nvme) + continue; + debugfs_create_file(hl_debugfs_list[i].name, 0644, root, @@ -1762,6 +2075,14 @@ int hl_debugfs_device_init(struct hl_device *hdev) spin_lock_init(&dev_entry->userptr_spinlock); mutex_init(&dev_entry->ctx_mem_hash_mutex); + spin_lock_init(&hdev->debugfs_cfg_accesses.lock); + hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */ + +#ifdef CONFIG_HL_HLDIO + /* Initialize DIO statistics */ + memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats)); +#endif + return 0; } @@ -1780,6 +2101,7 @@ void hl_debugfs_device_fini(struct hl_device *hdev) vfree(entry->state_dump[i]); kfree(entry->entry_arr); + } void hl_debugfs_add_device(struct hl_device *hdev) @@ -1792,6 +2114,7 @@ void hl_debugfs_add_device(struct hl_device *hdev) if (!hdev->asic_prop.fw_security_enabled) add_secured_nodes(dev_entry, dev_entry->root); + } void hl_debugfs_add_file(struct hl_fpriv *hpriv) @@ -1924,3 +2247,4 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, up_write(&dev_entry->state_dump_sem); } + diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 80fa08bf57bd..999c92d7036e 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -1630,6 +1630,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; + if (hdev->cpld_shutdown) { + dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n"); + return -EIO; + } + if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); return 0; @@ -2576,6 +2581,14 @@ void hl_device_fini(struct hl_device *hdev) if (rc) dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); + /* Reset the H/W (if it accessible). It will be in idle state after this returns */ + if (!hdev->cpld_shutdown) { + rc = hdev->asic_funcs->hw_fini(hdev, true, false); + if (rc) + dev_err(hdev->dev, + "hw_fini failed in device fini while removing device %d\n", rc); + } + hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; /* Release kernel context */ @@ -2943,3 +2956,13 @@ void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *eve mutex_unlock(&clk_throttle->lock); } + +void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask) +{ + hl_handle_critical_hw_err(hdev, event_id, event_mask); + *event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; + + /* Avoid any new accesses to the H/W */ + hdev->disabled = true; + hdev->cpld_shutdown = true; +} diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 6f27ce4fa01b..d94c2ba22a6a 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -90,7 +90,9 @@ struct hl_fpriv; #define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF #define HL_COMMON_DEC_INTERRUPT_ID 0xFFE -#define HL_STATE_DUMP_HIST_LEN 5 +#define HL_STATE_DUMP_HIST_LEN 5 +#define HL_DBGFS_CFG_ACCESS_HIST_LEN 20 +#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */ /* Default value for device reset trigger , an invalid value */ #define HL_RESET_TRIGGER_DEFAULT 0xFF @@ -702,6 +704,7 @@ struct hl_hints_range { * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. * @supports_engine_modes: true if changing engines/engine_cores modes is supported. * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw. + * @supports_nvme: indicates whether the asic supports NVMe P2P DMA. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -822,6 +825,7 @@ struct asic_fixed_properties { u8 supports_advanced_cpucp_rc; u8 supports_engine_modes; u8 support_dynamic_resereved_fw_size; + u8 supports_nvme; }; /** @@ -2274,6 +2278,9 @@ struct hl_vm { u8 init_done; }; +#ifdef CONFIG_HL_HLDIO +#include "hldio.h" +#endif /* * DEBUG, PROFILING STRUCTURE @@ -2344,7 +2351,6 @@ struct hl_fpriv { struct mutex ctx_lock; }; - /* * DebugFS */ @@ -2372,6 +2378,7 @@ struct hl_debugfs_entry { struct hl_dbg_device_entry *dev_entry; }; + /** * struct hl_dbg_device_entry - ASIC specific debugfs manager. * @root: root dentry. @@ -2403,6 +2410,7 @@ struct hl_debugfs_entry { * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read. + * @dio_stats: Direct I/O statistics */ struct hl_dbg_device_entry { struct dentry *root; @@ -2434,6 +2442,35 @@ struct hl_dbg_device_entry { u8 i2c_addr; u8 i2c_reg; u8 i2c_len; +#ifdef CONFIG_HL_HLDIO + struct hl_dio_stats dio_stats; +#endif +}; + +/** + * struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of + * hl_debugfs_cfg_access. + * @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons. + * @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64. + * @addr: the requested address to access. + * @valid: if set, this entry has valid data for dumping at interrupt time. + */ +struct hl_debugfs_cfg_access_entry { + ktime_t seconds_since_epoch; + enum debugfs_access_type debugfs_type; + u64 addr; + bool valid; +}; + +/** + * struct hl_debugfs_cfg_access - saves debugfs config region access requests history. + * @cfg_access_list: list of objects describing config region access requests. + * @head: next valid index to add new entry to in cfg_access_list. + */ +struct hl_debugfs_cfg_access { + struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN]; + u32 head; + spinlock_t lock; /* protects head and entries */ }; /** @@ -3281,6 +3318,7 @@ struct eq_heartbeat_debug_info { * @hl_chip_info: ASIC's sensors information. * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. + * @debugfs_cfg_accesses: list of last debugfs config region accesses. * @cb_pool: list of pre allocated CBs. * @cb_pool_lock: protects the CB pool. * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. @@ -3305,6 +3343,7 @@ struct eq_heartbeat_debug_info { * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. * @heartbeat_debug_info: counters used to debug heartbeat failures. + * @hldio: describes habanalabs direct storage interaction interface. * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling. * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @fw_inner_major_ver: the major of current loaded preboot inner version. @@ -3357,6 +3396,7 @@ struct eq_heartbeat_debug_info { * addresses. * @is_in_dram_scrub: true if dram scrub operation is on going. * @disabled: is device disabled. + * @cpld_shutdown: is cpld shutdown. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false @@ -3461,6 +3501,7 @@ struct hl_device { struct hwmon_chip_info *hl_chip_info; struct hl_dbg_device_entry hl_debugfs; + struct hl_debugfs_cfg_access debugfs_cfg_accesses; struct list_head cb_pool; spinlock_t cb_pool_lock; @@ -3496,7 +3537,9 @@ struct hl_device { struct hl_reset_info reset_info; struct eq_heartbeat_debug_info heartbeat_debug_info; - +#ifdef CONFIG_HL_HLDIO + struct hl_dio hldio; +#endif cpumask_t irq_affinity_mask; u32 *stream_master_qid_arr; @@ -3532,6 +3575,7 @@ struct hl_device { u16 cpu_pci_msb_addr; u8 is_in_dram_scrub; u8 disabled; + u8 cpld_shutdown; u8 late_init_done; u8 hwmon_initialized; u8 reset_on_lockup; @@ -4089,6 +4133,7 @@ void hl_init_cpu_for_irq(struct hl_device *hdev); void hl_set_irq_affinity(struct hl_device *hdev, int irq); void hl_eq_heartbeat_event_handle(struct hl_device *hdev); void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask); +void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask); #ifdef CONFIG_DEBUG_FS @@ -4110,6 +4155,7 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, unsigned long length); +void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev); #else @@ -4185,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev, { } +static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) +{ +} + #endif /* Security */ diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index dc80ca921d90..fdfdabc85e54 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -961,6 +961,12 @@ static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args * case HL_PASSTHROUGH_VERSIONS: need_input_buff = false; break; + case HL_GET_ERR_COUNTERS_CMD: + need_input_buff = true; + break; + case HL_GET_P_STATE: + need_input_buff = false; + break; default: return -EINVAL; } diff --git a/drivers/accel/habanalabs/common/hldio.c b/drivers/accel/habanalabs/common/hldio.c new file mode 100644 index 000000000000..083ae5610875 --- /dev/null +++ b/drivers/accel/habanalabs/common/hldio.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2024 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "hldio.h" +#include <generated/uapi/linux/version.h> +#include <linux/pci-p2pdma.h> +#include <linux/blkdev.h> +#include <linux/vmalloc.h> + +/* + * NVMe Direct I/O implementation for habanalabs driver + * + * ASSUMPTIONS + * =========== + * 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless). + * 2. Only READ operations (can extend in the future). + * 3. No sparse files (can overcome this in the future). + * 4. Kernel version >= 6.9 + * 5. Requiring page alignment is OK (I don't see a solution to this one right, + * now, how do we read partial pages?) + * 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel. + * Theoretically I have a slight idea on how this could be solvable, but it + * is probably inacceptable for the upstream. Also may not work in the end. + * 7. Either make sure our cards and disks are under the same PCI bridge, or + * compile a custom kernel to hack around this. + */ + +#define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */ + +/* + * This struct contains all the useful data I could milk out of the file handle + * provided by the user. + * @TODO: right now it is retrieved on each IO, but can be done once with some + * dedicated IOCTL, call it for example HL_REGISTER_HANDLE. + */ +struct hl_dio_fd { + /* Back pointer in case we need it in async completion */ + struct hl_ctx *ctx; + /* Associated fd struct */ + struct file *filp; +}; + +/* + * This is a single IO descriptor + */ +struct hl_direct_io { + struct hl_dio_fd f; + struct kiocb kio; + struct bio_vec *bv; + struct iov_iter iter; + u64 device_va; + u64 off_bytes; + u64 len_bytes; + u32 type; +}; + +bool hl_device_supports_nvme(struct hl_device *hdev) +{ + return hdev->asic_prop.supports_nvme; +} + +static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f) +{ + struct hl_device *hdev = ctx->hdev; + struct block_device *bd; + struct super_block *sb; + struct inode *inode; + struct gendisk *gd; + struct device *disk_dev; + int rc; + + f->filp = fget(fd); + if (!f->filp) { + rc = -ENOENT; + goto out; + } + + if (!(f->filp->f_flags & O_DIRECT)) { + dev_err(hdev->dev, "file is not in the direct mode\n"); + rc = -EINVAL; + goto fput; + } + + if (!f->filp->f_op->read_iter) { + dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n"); + rc = -EINVAL; + goto fput; + } + + inode = file_inode(f->filp); + sb = inode->i_sb; + bd = sb->s_bdev; + gd = bd->bd_disk; + + if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) { + dev_err(hdev->dev, "sparse files are not currently supported\n"); + rc = -EINVAL; + goto fput; + } + + if (!bd || !gd) { + dev_err(hdev->dev, "invalid block device\n"); + rc = -ENODEV; + goto fput; + } + /* Get the underlying device from the block device */ + disk_dev = disk_to_dev(gd); + if (!dma_pci_p2pdma_supported(disk_dev)) { + dev_err(hdev->dev, "device does not support PCI P2P DMA\n"); + rc = -EOPNOTSUPP; + goto fput; + } + + /* + * @TODO: Maybe we need additional checks here + */ + + f->ctx = ctx; + rc = 0; + + goto out; +fput: + fput(f->filp); +out: + return rc; +} + +static void hl_dio_fd_unregister(struct hl_dio_fd *f) +{ + fput(f->filp); +} + +static long hl_dio_count_io(struct hl_device *hdev) +{ + s64 sum = 0; + int i; + + for_each_possible_cpu(i) + sum += per_cpu(*hdev->hldio.inflight_ios, i); + + return sum; +} + +static bool hl_dio_get_iopath(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (hdev->hldio.io_enabled) { + this_cpu_inc(*hdev->hldio.inflight_ios); + + /* Avoid race conditions */ + if (!hdev->hldio.io_enabled) { + this_cpu_dec(*hdev->hldio.inflight_ios); + return false; + } + + hl_ctx_get(ctx); + + return true; + } + + return false; +} + +static void hl_dio_put_iopath(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + hl_ctx_put(ctx); + this_cpu_dec(*hdev->hldio.inflight_ios); +} + +static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled) +{ + hdev->hldio.io_enabled = enabled; +} + +static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io) +{ + if ((u64)io->device_va & ~PAGE_MASK) { + dev_dbg(hdev->dev, "device address must be 4K aligned\n"); + return false; + } + + if (io->len_bytes & ~PAGE_MASK) { + dev_dbg(hdev->dev, "IO length must be 4K aligned\n"); + return false; + } + + if (io->off_bytes & ~PAGE_MASK) { + dev_dbg(hdev->dev, "IO offset must be 4K aligned\n"); + return false; + } + + return true; +} + +static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va) +{ + struct hl_dio *hldio = &hdev->hldio; + u64 device_pa; + int rc, i; + + rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa); + if (rc) { + dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)", + device_va, rc); + return NULL; + } + + for (i = 0 ; i < hldio->np2prs ; ++i) { + if (device_pa >= hldio->p2prs[i].device_pa && + device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size) + return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >> + PAGE_SHIFT]; + } + + return NULL; +} + +static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io) +{ + u64 npages, device_va; + ssize_t rc; + int i; + + if (!hl_dio_validate_io(hdev, io)) + return -EINVAL; + + if (!hl_dio_get_iopath(io->f.ctx)) { + dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n"); + return -ESHUTDOWN; + } + + init_sync_kiocb(&io->kio, io->f.filp); + io->kio.ki_pos = io->off_bytes; + + npages = (io->len_bytes >> PAGE_SHIFT); + + /* @TODO: this can be implemented smarter, vmalloc in iopath is not + * ideal. Maybe some variation of genpool. Number of pages may differ + * greatly, so maybe even use pools of different sizes and chose the + * closest one. + */ + io->bv = vzalloc(npages * sizeof(struct bio_vec)); + if (!io->bv) + return -ENOMEM; + + for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) { + io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va); + if (!io->bv[i].bv_page) { + dev_err(hdev->dev, "error getting page struct for device va %#llx", + device_va); + rc = -EFAULT; + goto cleanup; + } + io->bv[i].bv_offset = 0; + io->bv[i].bv_len = PAGE_SIZE; + } + + iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes); + if (io->f.filp->f_op && io->f.filp->f_op->read_iter) + rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter); + else + rc = -EINVAL; + +cleanup: + vfree(io->bv); + hl_dio_put_iopath(io->f.ctx); + + dev_dbg(hdev->dev, "IO ended with %ld\n", rc); + + return rc; +} + +/* + * @TODO: This function can be used as a callback for io completion under + * kio->ki_complete in order to implement async IO. + * Note that on more recent kernels there is no ret2. + */ +__maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2) +{ + struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio); + + dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret); + + /* Do something to copy result to user / notify completion */ + + hl_dio_put_iopath(io->f.ctx); + + hl_dio_fd_unregister(&io->f); +} + +/* + * DMA disk to ASIC, wait for results. Must be invoked from the user context + */ +int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, + u64 device_va, off_t off_bytes, size_t len_bytes, + size_t *len_read) +{ + struct hl_direct_io *io; + ssize_t rc; + + dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes); + + io = kzalloc(sizeof(*io), GFP_KERNEL); + if (!io) { + rc = -ENOMEM; + goto out; + } + + *io = (struct hl_direct_io){ + .device_va = device_va, + .len_bytes = len_bytes, + .off_bytes = off_bytes, + .type = READ, + }; + + rc = hl_dio_fd_register(ctx, fd, &io->f); + if (rc) + goto kfree_io; + + rc = hl_direct_io(hdev, io); + if (rc >= 0) { + *len_read = rc; + rc = 0; + } + + /* This shall be called only in the case of a sync IO */ + hl_dio_fd_unregister(&io->f); +kfree_io: + kfree(io); +out: + return rc; +} + +static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr) +{ + if (p2pr->p2ppages) { + vfree(p2pr->p2ppages); + p2pr->p2ppages = NULL; + } + + if (p2pr->p2pmem) { + dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n", + p2pr->p2pmem, p2pr->size); + pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size); + p2pr->p2pmem = NULL; + } +} + +void hl_p2p_region_fini_all(struct hl_device *hdev) +{ + int i; + + for (i = 0 ; i < hdev->hldio.np2prs ; ++i) + hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]); + + kvfree(hdev->hldio.p2prs); + hdev->hldio.p2prs = NULL; + hdev->hldio.np2prs = 0; +} + +int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr) +{ + void *addr; + int rc, i; + + /* Start by publishing our p2p memory */ + rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset); + if (rc) { + dev_err(hdev->dev, "error adding p2p resource: %d\n", rc); + goto err; + } + + /* Alloc all p2p mem */ + p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size); + if (!p2pr->p2pmem) { + dev_err(hdev->dev, "error allocating p2p memory\n"); + rc = -ENOMEM; + goto err; + } + + p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *)); + if (!p2pr->p2ppages) { + rc = -ENOMEM; + goto err; + } + + for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) { + p2pr->p2ppages[i] = virt_to_page(addr); + if (!p2pr->p2ppages[i]) { + rc = -EFAULT; + goto err; + } + } + + return 0; +err: + hl_p2p_region_fini(hdev, p2pr); + return rc; +} + +int hl_dio_start(struct hl_device *hdev) +{ + dev_dbg(hdev->dev, "initializing HLDIO\n"); + + /* Initialize the IO counter and enable IO */ + hdev->hldio.inflight_ios = alloc_percpu(s64); + if (!hdev->hldio.inflight_ios) + return -ENOMEM; + + hl_dio_set_io_enabled(hdev, true); + + return 0; +} + +void hl_dio_stop(struct hl_device *hdev) +{ + dev_dbg(hdev->dev, "deinitializing HLDIO\n"); + + if (hdev->hldio.io_enabled) { + /* Wait for all the IO to finish */ + hl_dio_set_io_enabled(hdev, false); + hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT); + } + + if (hdev->hldio.inflight_ios) { + free_percpu(hdev->hldio.inflight_ios); + hdev->hldio.inflight_ios = NULL; + } +} diff --git a/drivers/accel/habanalabs/common/hldio.h b/drivers/accel/habanalabs/common/hldio.h new file mode 100644 index 000000000000..2874388f2851 --- /dev/null +++ b/drivers/accel/habanalabs/common/hldio.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver + * + * This feature requires specific hardware setup and must not be built + * under COMPILE_TEST. + */ + +#ifndef __HL_HLDIO_H__ +#define __HL_HLDIO_H__ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/ktime.h> /* ktime functions */ +#include <linux/delay.h> /* usleep_range */ +#include <linux/kernel.h> /* might_sleep_if */ +#include <linux/errno.h> /* error codes */ + +/* Forward declarations */ +struct hl_device; +struct file; + +/* Enable only if Kconfig selected */ +#ifdef CONFIG_HL_HLDIO +/** + * struct hl_p2p_region - describes a single P2P memory region + * @p2ppages: array of page structs for the P2P memory + * @p2pmem: virtual address of the P2P memory region + * @device_pa: physical address on the device + * @bar_offset: offset within the BAR + * @size: size of the region in bytes + * @bar: BAR number containing this region + */ +struct hl_p2p_region { + struct page **p2ppages; + void *p2pmem; + u64 device_pa; + u64 bar_offset; + u64 size; + int bar; +}; + +/** + * struct hl_dio_stats - Direct I/O statistics + * @total_ops: total number of operations attempted + * @successful_ops: number of successful operations + * @failed_ops: number of failed operations + * @bytes_transferred: total bytes successfully transferred + * @last_len_read: length of the last read operation + */ +struct hl_dio_stats { + u64 total_ops; + u64 successful_ops; + u64 failed_ops; + u64 bytes_transferred; + size_t last_len_read; +}; + +/** + * struct hl_dio - describes habanalabs direct storage interaction interface + * @p2prs: array of p2p regions + * @inflight_ios: percpu counter for inflight ios + * @np2prs: number of elements in p2prs + * @io_enabled: 1 if io is enabled 0 otherwise + */ +struct hl_dio { + struct hl_p2p_region *p2prs; + s64 __percpu *inflight_ios; + u8 np2prs; + u8 io_enabled; +}; + +int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, + u64 device_va, off_t off_bytes, size_t len_bytes, + size_t *len_read); +void hl_p2p_region_fini_all(struct hl_device *hdev); +int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr); +int hl_dio_start(struct hl_device *hdev); +void hl_dio_stop(struct hl_device *hdev); + +/* Init/teardown */ +int hl_hldio_init(struct hl_device *hdev); +void hl_hldio_fini(struct hl_device *hdev); + +/* File operations */ +long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); + +/* DebugFS hooks */ +#ifdef CONFIG_DEBUG_FS +void hl_hldio_debugfs_init(struct hl_device *hdev); +void hl_hldio_debugfs_fini(struct hl_device *hdev); +#else +static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { } +static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { } +#endif + +#else /* !CONFIG_HL_HLDIO */ + +struct hl_p2p_region; +/* Stubs when HLDIO is disabled */ +static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, + u64 device_va, off_t off_bytes, size_t len_bytes, + size_t *len_read) +{ return -EOPNOTSUPP; } +static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {} +static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr) +{ return -EOPNOTSUPP; } +static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; } +static inline void hl_dio_stop(struct hl_device *hdev) {} + +static inline int hl_hldio_init(struct hl_device *hdev) { return 0; } +static inline void hl_hldio_fini(struct hl_device *hdev) { } +static inline long hl_hldio_ioctl(struct file *f, unsigned int c, + unsigned long a) +{ return -ENOTTY; } +static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { } +static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { } + +#endif /* CONFIG_HL_HLDIO */ + +/* Simplified polling macro for HLDIO (no simulator support) */ +#define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \ +({ \ + ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \ + might_sleep_if(sleep_us); \ + (void)(hdev); /* keep signature consistent, hdev unused */ \ + for (;;) { \ + mb(); /* ensure ordering of memory operations */ \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \ + break; \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +#ifdef CONFIG_HL_HLDIO +bool hl_device_supports_nvme(struct hl_device *hdev); +#else +static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; } +#endif + +#endif /* __HL_HLDIO_H__ */ diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 61472a381904..633db4bff46f 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -1837,7 +1837,12 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) atomic_dec(&ctx->hdev->dmabuf_export_cnt); hl_ctx_put(ctx); - /* Paired with get_file() in export_dmabuf() */ + /* + * Paired with get_file() in export_dmabuf(). + * 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called, + * because releasing the compute device file involves another reference decrement, and it + * would be possible only after calling fput(). + */ fput(ctx->hpriv->file_priv->filp); kfree(hl_dmabuf); @@ -2332,7 +2337,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, if (rc < 0) goto destroy_pages; npages = rc; - rc = -EFAULT; + rc = -ENOMEM; goto put_pages; } userptr->npages = npages; diff --git a/drivers/accel/habanalabs/common/memory_mgr.c b/drivers/accel/habanalabs/common/memory_mgr.c index 99cd83139d46..4401beb99e42 100644 --- a/drivers/accel/habanalabs/common/memory_mgr.c +++ b/drivers/accel/habanalabs/common/memory_mgr.c @@ -259,13 +259,8 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma, goto put_mem; } -#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK - if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start, - user_mem_size)) { -#else if (!access_ok((void __user *)(uintptr_t)vma->vm_start, user_mem_size)) { -#endif dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n", buf->behavior->topic, vma->vm_start); diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index 82f66520ec18..8f55ba3b4e73 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -96,14 +96,21 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c infineon_second_stage_third_instance = (infineon_second_stage_version >> 16) & mask; - if (cpucp_info->infineon_second_stage_version) + if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version) return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n", le32_to_cpu(cpucp_info->infineon_version), infineon_second_stage_first_instance, infineon_second_stage_second_instance, infineon_second_stage_third_instance); - else + else if (cpucp_info->infineon_second_stage_version) + return sprintf(buf, "%#04x:%#04x:%#04x\n", + infineon_second_stage_first_instance, + infineon_second_stage_second_instance, + infineon_second_stage_third_instance); + else if (cpucp_info->infineon_version) return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); + + return 0; } static DEVICE_ATTR_RO(vrm_ver); |