summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/vfio')
-rw-r--r--tools/testing/selftests/vfio/.gitignore10
-rw-r--r--tools/testing/selftests/vfio/Makefile21
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/vfio_util.h295
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk24
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c594
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c126
-rwxr-xr-xtools/testing/selftests/vfio/run.sh109
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c199
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c127
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c176
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c244
16 files changed, 2579 insertions, 0 deletions
diff --git a/tools/testing/selftests/vfio/.gitignore b/tools/testing/selftests/vfio/.gitignore
new file mode 100644
index 000000000000..7fadc19d3bca
--- /dev/null
+++ b/tools/testing/selftests/vfio/.gitignore
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+*
+!/**/
+!*.c
+!*.h
+!*.S
+!*.sh
+!*.mk
+!.gitignore
+!Makefile
diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile
new file mode 100644
index 000000000000..324ba0175a33
--- /dev/null
+++ b/tools/testing/selftests/vfio/Makefile
@@ -0,0 +1,21 @@
+CFLAGS = $(KHDR_INCLUDES)
+TEST_GEN_PROGS += vfio_dma_mapping_test
+TEST_GEN_PROGS += vfio_iommufd_setup_test
+TEST_GEN_PROGS += vfio_pci_device_test
+TEST_GEN_PROGS += vfio_pci_driver_test
+TEST_PROGS_EXTENDED := run.sh
+include ../lib.mk
+include lib/libvfio.mk
+
+CFLAGS += -I$(top_srcdir)/tools/include
+CFLAGS += -MD
+CFLAGS += $(EXTRA_CFLAGS)
+
+$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@
+
+TEST_GEN_PROGS_O = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O))
+-include $(TEST_DEP_FILES)
+
+EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES)
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
new file mode 100644
index 000000000000..0ca2cbc2a316
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/bits.h>
+#include <linux/errno.h>
+#include <linux/idxd.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <vfio_util.h>
+
+#include "registers.h"
+
+/* Vectors 1+ are available for work queue completion interrupts. */
+#define MSIX_VECTOR 1
+
+struct dsa_state {
+ /* Descriptors for copy and batch operations. */
+ struct dsa_hw_desc batch[32];
+ struct dsa_hw_desc copy[1024];
+
+ /* Completion records for copy and batch operations. */
+ struct dsa_completion_record copy_completion;
+ struct dsa_completion_record batch_completion;
+
+ /* Cached device registers (and derived data) for easy access */
+ union gen_cap_reg gen_cap;
+ union wq_cap_reg wq_cap;
+ union group_cap_reg group_cap;
+ union engine_cap_reg engine_cap;
+ union offsets_reg table_offsets;
+ void *wqcfg_table;
+ void *grpcfg_table;
+ u64 max_batches;
+ u64 max_copies_per_batch;
+
+ /* The number of ongoing memcpy operations. */
+ u64 memcpy_count;
+
+ /* Buffers used by dsa_send_msi() to generate an interrupt */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static bool dsa_int_handle_request_required(struct vfio_pci_device *device)
+{
+ void *bar0 = device->bars[0].vaddr;
+ union gen_cap_reg gen_cap;
+ u32 cmd_cap;
+
+ gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ if (!gen_cap.cmd_cap)
+ return false;
+
+ cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET);
+ return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1;
+}
+
+static int dsa_probe(struct vfio_pci_device *device)
+{
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_DSA_SPR0))
+ return -EINVAL;
+
+ if (dsa_int_handle_request_required(device)) {
+ printf("Device requires requesting interrupt handles\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dsa_check_sw_err(struct vfio_pci_device *device)
+{
+ void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET;
+ union sw_err_reg err = {};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err.bits); i++) {
+ err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i]));
+
+ /* No errors */
+ if (i == 0 && !err.valid)
+ return;
+ }
+
+ fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ err.bits[0], err.bits[1], err.bits[2], err.bits[3]);
+
+ fprintf(stderr, " valid: 0x%x\n", err.valid);
+ fprintf(stderr, " overflow: 0x%x\n", err.overflow);
+ fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid);
+ fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid);
+ fprintf(stderr, " batch: 0x%x\n", err.batch);
+ fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw);
+ fprintf(stderr, " priv: 0x%x\n", err.priv);
+ fprintf(stderr, " error: 0x%x\n", err.error);
+ fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx);
+ fprintf(stderr, " operation: 0x%x\n", err.operation);
+ fprintf(stderr, " pasid: 0x%x\n", err.pasid);
+ fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx);
+ fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags);
+ fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr);
+
+ VFIO_FAIL("Software Error Detected!\n");
+}
+
+static void dsa_command(struct vfio_pci_device *device, u32 cmd)
+{
+ union idxd_command_reg cmd_reg = { .cmd = cmd };
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ void *bar0 = device->bars[0].vaddr;
+ u32 status;
+ u8 err;
+
+ writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET);
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = readl(bar0 + IDXD_CMDSTS_OFFSET);
+ if (!(status & IDXD_CMDSTS_ACTIVE))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ err = status & IDXD_CMDSTS_ERR_MASK;
+ VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err);
+}
+
+static void dsa_wq_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union wq_cap_reg wq_cap = dsa->wq_cap;
+ union wqcfg wqcfg;
+ u64 wqcfg_size;
+ int i;
+
+ VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0);
+
+ wqcfg = (union wqcfg) {
+ .wq_size = wq_cap.total_wq_size,
+ .mode = 1,
+ .priority = 1,
+ /*
+ * Disable Address Translation Service (if enabled) so that VFIO
+ * selftests using this driver can generate I/O page faults.
+ */
+ .wq_ats_disable = wq_cap.wq_ats_support,
+ .max_xfer_shift = dsa->gen_cap.max_xfer_shift,
+ .max_batch_shift = dsa->gen_cap.max_batch_shift,
+ .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH),
+ };
+
+ wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+
+ for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++)
+ writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i]));
+}
+
+static void dsa_group_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union group_cap_reg group_cap = dsa->group_cap;
+ union engine_cap_reg engine_cap = dsa->engine_cap;
+
+ VFIO_ASSERT_GT((u32)group_cap.num_groups, 0);
+ VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0);
+
+ /* Assign work queue 0 and engine 0 to group 0 */
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0]));
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines));
+}
+
+static void dsa_register_cache_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ void *bar0 = device->bars[0].vaddr;
+
+ dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET);
+ dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET);
+ dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET);
+
+ dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET);
+ dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8);
+
+ dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT;
+ dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT;
+
+ dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN);
+ dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch));
+
+ dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift;
+ dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy));
+}
+
+static void dsa_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+
+ dsa_register_cache_init(device);
+ dsa_wq_init(device);
+ dsa_group_init(device);
+
+ dsa_command(device, IDXD_CMD_ENABLE_DEVICE);
+ dsa_command(device, IDXD_CMD_ENABLE_WQ);
+
+ vfio_pci_msix_enable(device, MSIX_VECTOR, 1);
+
+ device->driver.max_memcpy_count =
+ dsa->max_batches * dsa->max_copies_per_batch;
+ device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift;
+ device->driver.msi = MSIX_VECTOR;
+}
+
+static void dsa_remove(struct vfio_pci_device *device)
+{
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+ vfio_pci_msix_disable(device);
+}
+
+static int dsa_completion_wait(struct vfio_pci_device *device,
+ struct dsa_completion_record *completion)
+{
+ u8 status;
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = READ_ONCE(completion->status);
+ if (status)
+ break;
+
+ usleep(1000);
+ }
+
+ if (status == DSA_COMP_SUCCESS)
+ return 0;
+
+ printf("Error detected during memcpy operation: 0x%x\n", status);
+ return -1;
+}
+
+static void dsa_copy_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ iova_t src, iova_t dst, u64 size,
+ bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ u16 flags;
+
+ flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+
+ if (interrupt)
+ flags |= IDXD_OP_FLAG_RCI;
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_MEMMOVE,
+ .flags = flags,
+ .priv = 1,
+ .src_addr = src,
+ .dst_addr = dst,
+ .xfer_size = size,
+ .completion_addr = to_iova(device, &dsa->copy_completion),
+ .int_handle = interrupt ? MSIX_VECTOR : 0,
+ };
+}
+
+static void dsa_batch_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_BATCH,
+ .flags = IDXD_OP_FLAG_CRAV,
+ .priv = 1,
+ .completion_addr = to_iova(device, &dsa->batch_completion),
+ .desc_list_addr = to_iova(device, &dsa->copy[0]),
+ .desc_count = count,
+ };
+}
+
+static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc)
+{
+ /* Write the contents (not address) of the 64-byte descriptor to the device. */
+ iosubmit_cmds512(device->bars[2].vaddr, desc, 1);
+}
+
+static void dsa_memcpy_one(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion));
+
+ dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt);
+ dsa_desc_write(device, &dsa->copy[0]);
+}
+
+static void dsa_memcpy_batch(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int i;
+
+ memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion));
+
+ for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) {
+ struct dsa_hw_desc *copy_desc = &dsa->copy[i];
+
+ dsa_copy_desc_init(device, copy_desc, src, dst, size, false);
+
+ /* Don't request completions for individual copies. */
+ copy_desc->flags &= ~IDXD_OP_FLAG_RCR;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) {
+ struct dsa_hw_desc *batch_desc = &dsa->batch[i];
+ int nr_copies;
+
+ nr_copies = min(count, dsa->max_copies_per_batch);
+ count -= nr_copies;
+
+ /*
+ * Batches must have at least 2 copies, so handle the case where
+ * there is exactly 1 copy left by doing one less copy in this
+ * batch and then 2 in the next.
+ */
+ if (count == 1) {
+ nr_copies--;
+ count++;
+ }
+
+ dsa_batch_desc_init(device, batch_desc, nr_copies);
+
+ /* Request a completion for the last batch. */
+ if (!count)
+ batch_desc->flags |= IDXD_OP_FLAG_RCR;
+
+ dsa_desc_write(device, batch_desc);
+ }
+
+ VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count);
+}
+
+static void dsa_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ /* DSA devices require at least 2 copies per batch. */
+ if (count == 1)
+ dsa_memcpy_one(device, src, dst, size, false);
+ else
+ dsa_memcpy_batch(device, src, dst, size, count);
+
+ dsa->memcpy_count = count;
+}
+
+static int dsa_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int r;
+
+ if (dsa->memcpy_count == 1)
+ r = dsa_completion_wait(device, &dsa->copy_completion);
+ else
+ r = dsa_completion_wait(device, &dsa->batch_completion);
+
+ dsa->memcpy_count = 0;
+
+ return r;
+}
+
+static void dsa_send_msi(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ dsa_memcpy_one(device,
+ to_iova(device, &dsa->send_msi_src),
+ to_iova(device, &dsa->send_msi_dst),
+ sizeof(dsa->send_msi_src), true);
+
+ VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0);
+}
+
+const struct vfio_pci_driver_ops dsa_ops = {
+ .name = "dsa",
+ .probe = dsa_probe,
+ .init = dsa_init,
+ .remove = dsa_remove,
+ .memcpy_start = dsa_memcpy_start,
+ .memcpy_wait = dsa_memcpy_wait,
+ .send_msi = dsa_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
new file mode 120000
index 000000000000..bde657c3c2af
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/idxd/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
new file mode 120000
index 000000000000..8ab52ddd4458
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/hw.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
new file mode 100644
index 000000000000..c3b91d9b1f59
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <vfio_util.h>
+
+#include "hw.h"
+#include "registers.h"
+
+#define IOAT_DMACOUNT_MAX UINT16_MAX
+
+struct ioat_state {
+ /* Single descriptor used to issue DMA memcpy operations */
+ struct ioat_dma_descriptor desc;
+
+ /* Copy buffers used by ioat_send_msi() to generate an interrupt. */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct ioat_state *to_ioat_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static inline void *ioat_channel_registers(struct vfio_pci_device *device)
+{
+ return device->bars[0].vaddr + IOAT_CHANNEL_MMIO_SIZE;
+}
+
+static int ioat_probe(struct vfio_pci_device *device)
+{
+ u8 version;
+ int r;
+
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_IOAT_SKX))
+ return -EINVAL;
+
+ VFIO_ASSERT_NOT_NULL(device->bars[0].vaddr);
+
+ version = readb(device->bars[0].vaddr + IOAT_VER_OFFSET);
+ switch (version) {
+ case IOAT_VER_3_2:
+ case IOAT_VER_3_3:
+ r = 0;
+ break;
+ default:
+ printf("ioat: Unsupported version: 0x%x\n", version);
+ r = -EINVAL;
+ }
+ return r;
+}
+
+static u64 ioat_channel_status(void *bar)
+{
+ return readq(bar + IOAT_CHANSTS_OFFSET) & IOAT_CHANSTS_STATUS;
+}
+
+static void ioat_clear_errors(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 errors;
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = readl(registers + IOAT_CHANERR_OFFSET);
+ writel(errors, registers + IOAT_CHANERR_OFFSET);
+}
+
+static void ioat_reset(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ u8 chancmd;
+
+ ioat_clear_errors(device);
+
+ writeb(IOAT_CHANCMD_RESET, registers + IOAT2_CHANCMD_OFFSET);
+
+ for (;;) {
+ chancmd = readb(registers + IOAT2_CHANCMD_OFFSET);
+ if (!(chancmd & IOAT_CHANCMD_RESET))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ VFIO_ASSERT_EQ(ioat_channel_status(registers), IOAT_CHANSTS_HALTED);
+}
+
+static void ioat_init(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+ u8 intrctrl;
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*ioat));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ ioat_reset(device);
+
+ /* Enable the use of MXI-x interrupts for channel interrupts. */
+ intrctrl = IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ writeb(intrctrl, device->bars[0].vaddr + IOAT_INTRCTRL_OFFSET);
+
+ vfio_pci_msix_enable(device, 0, device->msix_info.count);
+
+ device->driver.msi = 0;
+ device->driver.max_memcpy_size =
+ 1UL << readb(device->bars[0].vaddr + IOAT_XFERCAP_OFFSET);
+ device->driver.max_memcpy_count = IOAT_DMACOUNT_MAX;
+}
+
+static void ioat_remove(struct vfio_pci_device *device)
+{
+ ioat_reset(device);
+ vfio_pci_msix_disable(device);
+}
+
+static void ioat_handle_error(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+
+ printf("Error detected during memcpy operation!\n"
+ " CHANERR: 0x%x\n"
+ " CHANERR_INT: 0x%x\n"
+ " DMAUNCERRSTS: 0x%x\n",
+ readl(registers + IOAT_CHANERR_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET));
+
+ ioat_reset(device);
+}
+
+static int ioat_memcpy_wait(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u64 status;
+ int r = 0;
+
+ /* Wait until all operations complete. */
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_DONE)
+ break;
+
+ if (status == IOAT_CHANSTS_HALTED) {
+ ioat_handle_error(device);
+ return -1;
+ }
+ }
+
+ /* Put the channel into the SUSPENDED state. */
+ writeb(IOAT_CHANCMD_SUSPEND, registers + IOAT2_CHANCMD_OFFSET);
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_SUSPENDED)
+ break;
+ }
+
+ return r;
+}
+
+static void __ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u16 count, bool interrupt)
+{
+ void *registers = ioat_channel_registers(device);
+ struct ioat_state *ioat = to_ioat_state(device);
+ u64 desc_iova;
+ u16 chanctrl;
+
+ desc_iova = to_iova(device, &ioat->desc);
+ ioat->desc = (struct ioat_dma_descriptor) {
+ .ctl_f.op = IOAT_OP_COPY,
+ .ctl_f.int_en = interrupt,
+ .src_addr = src,
+ .dst_addr = dst,
+ .size = size,
+ .next = desc_iova,
+ };
+
+ /* Tell the device the address of the descriptor. */
+ writeq(desc_iova, registers + IOAT2_CHAINADDR_OFFSET);
+
+ /* (Re)Enable the channel interrupt and abort on any errors */
+ chanctrl = IOAT_CHANCTRL_INT_REARM | IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+ writew(chanctrl, registers + IOAT_CHANCTRL_OFFSET);
+
+ /* Kick off @count DMA copy operation(s). */
+ writew(count, registers + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ __ioat_memcpy_start(device, src, dst, size, count, false);
+}
+
+static void ioat_send_msi(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+
+ __ioat_memcpy_start(device,
+ to_iova(device, &ioat->send_msi_src),
+ to_iova(device, &ioat->send_msi_dst),
+ sizeof(ioat->send_msi_src), 1, true);
+
+ VFIO_ASSERT_EQ(ioat_memcpy_wait(device), 0);
+}
+
+const struct vfio_pci_driver_ops ioat_ops = {
+ .name = "ioat",
+ .probe = ioat_probe,
+ .init = ioat_init,
+ .remove = ioat_remove,
+ .memcpy_start = ioat_memcpy_start,
+ .memcpy_wait = ioat_memcpy_wait,
+ .send_msi = ioat_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
new file mode 120000
index 000000000000..0b809cfd8fe6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
new file mode 100644
index 000000000000..ed31606e01b7
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H
+
+#include <fcntl.h>
+#include <string.h>
+#include <linux/vfio.h>
+#include <linux/list.h>
+#include <linux/pci_regs.h>
+
+#include "../../../kselftest.h"
+
+#define VFIO_LOG_AND_EXIT(...) do { \
+ fprintf(stderr, " " __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ exit(KSFT_FAIL); \
+} while (0)
+
+#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \
+ typeof(_lhs) __lhs = (_lhs); \
+ typeof(_rhs) __rhs = (_rhs); \
+ \
+ if (__lhs _op __rhs) \
+ break; \
+ \
+ fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \
+ fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \
+ fprintf(stderr, " Observed: %#lx %s %#lx\n", \
+ (u64)__lhs, #_op, (u64)__rhs); \
+ fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \
+ VFIO_LOG_AND_EXIT(__VA_ARGS__); \
+} while (0)
+
+#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__)
+#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__)
+#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__)
+#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__)
+#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__)
+#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__)
+#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__)
+#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__)
+
+#define VFIO_FAIL(_fmt, ...) do { \
+ fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \
+ VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \
+} while (0)
+
+struct vfio_iommu_mode {
+ const char *name;
+ const char *container_path;
+ unsigned long iommu_type;
+};
+
+/*
+ * Generator for VFIO selftests fixture variants that replicate across all
+ * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
+ * which should then use FIXTURE_VARIANT_ADD() to create the variant.
+ */
+#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__)
+
+struct vfio_pci_bar {
+ struct vfio_region_info info;
+ void *vaddr;
+};
+
+typedef u64 iova_t;
+
+#define INVALID_IOVA UINT64_MAX
+
+struct vfio_dma_region {
+ struct list_head link;
+ void *vaddr;
+ iova_t iova;
+ u64 size;
+};
+
+struct vfio_pci_device;
+
+struct vfio_pci_driver_ops {
+ const char *name;
+
+ /**
+ * @probe() - Check if the driver supports the given device.
+ *
+ * Return: 0 on success, non-0 on failure.
+ */
+ int (*probe)(struct vfio_pci_device *device);
+
+ /**
+ * @init() - Initialize the driver for @device.
+ *
+ * Must be called after device->driver.region has been initialized.
+ */
+ void (*init)(struct vfio_pci_device *device);
+
+ /**
+ * remove() - Deinitialize the driver for @device.
+ */
+ void (*remove)(struct vfio_pci_device *device);
+
+ /**
+ * memcpy_start() - Kick off @count repeated memcpy operations from
+ * [@src, @src + @size) to [@dst, @dst + @size).
+ *
+ * Guarantees:
+ * - The device will attempt DMA reads on [src, src + size).
+ * - The device will attempt DMA writes on [dst, dst + size).
+ * - The device will not generate any interrupts.
+ *
+ * memcpy_start() returns immediately, it does not wait for the
+ * copies to complete.
+ */
+ void (*memcpy_start)(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count);
+
+ /**
+ * memcpy_wait() - Wait until the memcpy operations started by
+ * memcpy_start() have finished.
+ *
+ * Guarantees:
+ * - All in-flight DMAs initiated by memcpy_start() are fully complete
+ * before memcpy_wait() returns.
+ *
+ * Returns non-0 if the driver detects that an error occurred during the
+ * memcpy, 0 otherwise.
+ */
+ int (*memcpy_wait)(struct vfio_pci_device *device);
+
+ /**
+ * send_msi() - Make the device send the MSI device->driver.msi.
+ *
+ * Guarantees:
+ * - The device will send the MSI once.
+ */
+ void (*send_msi)(struct vfio_pci_device *device);
+};
+
+struct vfio_pci_driver {
+ const struct vfio_pci_driver_ops *ops;
+ bool initialized;
+ bool memcpy_in_progress;
+
+ /* Region to be used by the driver (e.g. for in-memory descriptors) */
+ struct vfio_dma_region region;
+
+ /* The maximum size that can be passed to memcpy_start(). */
+ u64 max_memcpy_size;
+
+ /* The maximum count that can be passed to memcpy_start(). */
+ u64 max_memcpy_count;
+
+ /* The MSI vector the device will signal in ops->send_msi(). */
+ int msi;
+};
+
+struct vfio_pci_device {
+ int fd;
+
+ const struct vfio_iommu_mode *iommu_mode;
+ int group_fd;
+ int container_fd;
+
+ int iommufd;
+ u32 ioas_id;
+
+ struct vfio_device_info info;
+ struct vfio_region_info config_space;
+ struct vfio_pci_bar bars[PCI_STD_NUM_BARS];
+
+ struct vfio_irq_info msi_info;
+ struct vfio_irq_info msix_info;
+
+ struct list_head dma_regions;
+
+ /* eventfds for MSI and MSI-x interrupts */
+ int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
+
+ struct vfio_pci_driver driver;
+};
+
+/*
+ * Return the BDF string of the device that the test should use.
+ *
+ * If a BDF string is provided by the user on the command line (as the last
+ * element of argv[]), then this function will return that and decrement argc
+ * by 1.
+ *
+ * Otherwise this function will attempt to use the environment variable
+ * $VFIO_SELFTESTS_BDF.
+ *
+ * If BDF cannot be determined then the test will exit with KSFT_SKIP.
+ */
+const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
+const char *vfio_pci_get_cdev_path(const char *bdf);
+
+extern const char *default_iommu_mode;
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode);
+void vfio_pci_device_cleanup(struct vfio_pci_device *device);
+void vfio_pci_device_reset(struct vfio_pci_device *device);
+
+void vfio_pci_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region);
+void vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region);
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data);
+
+#define vfio_pci_config_read(_device, _offset, _type) ({ \
+ _type __data; \
+ vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \
+ __data; \
+})
+
+#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8)
+#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16)
+#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32)
+
+#define vfio_pci_config_write(_device, _offset, _value, _type) do { \
+ _type __data = (_value); \
+ vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \
+} while (0)
+
+#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8)
+#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16)
+#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32)
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index,
+ u32 vector, int count);
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index);
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector);
+
+static inline void fcntl_set_nonblock(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GETFL, 0);
+ VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd);
+
+ r = fcntl(fd, F_SETFL, r | O_NONBLOCK);
+ VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd);
+}
+
+static inline void vfio_pci_msi_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msi_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX);
+}
+
+static inline void vfio_pci_msix_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msix_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX);
+}
+
+iova_t __to_iova(struct vfio_pci_device *device, void *vaddr);
+iova_t to_iova(struct vfio_pci_device *device, void *vaddr);
+
+static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
+ u16 vendor_id, u16 device_id)
+{
+ return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) &&
+ (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID));
+}
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device);
+void vfio_pci_driver_init(struct vfio_pci_device *device);
+void vfio_pci_driver_remove(struct vfio_pci_device *device);
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size);
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count);
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device);
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H */
diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk
new file mode 100644
index 000000000000..5d11c3a89a28
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.mk
@@ -0,0 +1,24 @@
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+VFIO_DIR := $(selfdir)/vfio
+
+LIBVFIO_C := lib/vfio_pci_device.c
+LIBVFIO_C += lib/vfio_pci_driver.c
+
+ifeq ($(ARCH:x86_64=x86),x86)
+LIBVFIO_C += lib/drivers/ioat/ioat.c
+LIBVFIO_C += lib/drivers/dsa/dsa.c
+endif
+
+LIBVFIO_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBVFIO_C))
+
+LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq)
+$(shell mkdir -p $(LIBVFIO_O_DIRS))
+
+CFLAGS += -I$(VFIO_DIR)/lib/include
+
+$(LIBVFIO_O): $(OUTPUT)/%.o : $(VFIO_DIR)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBVFIO_O)
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
new file mode 100644
index 000000000000..0921b2451ba5
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <uapi/linux/types.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "../../../kselftest.h"
+#include <vfio_util.h>
+
+#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
+
+#define ioctl_assert(_fd, _op, _arg) do { \
+ void *__arg = (_arg); \
+ int __ret = ioctl((_fd), (_op), (__arg)); \
+ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
+} while (0)
+
+iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ struct vfio_dma_region *region;
+
+ list_for_each_entry(region, &device->dma_regions, link) {
+ if (vaddr < region->vaddr)
+ continue;
+
+ if (vaddr >= region->vaddr + region->size)
+ continue;
+
+ return region->iova + (vaddr - region->vaddr);
+ }
+
+ return INVALID_IOVA;
+}
+
+iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ iova_t iova;
+
+ iova = __to_iova(device, vaddr);
+ VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr);
+
+ return iova;
+}
+
+static void vfio_pci_irq_set(struct vfio_pci_device *device,
+ u32 index, u32 vector, u32 count, int *fds)
+{
+ u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {};
+ struct vfio_irq_set *irq = (void *)&buf;
+ int *irq_fds = (void *)&irq->data;
+
+ irq->argsz = sizeof(buf);
+ irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq->index = index;
+ irq->start = vector;
+ irq->count = count;
+
+ if (count) {
+ irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
+ memcpy(irq_fds, fds, sizeof(int) * count);
+ } else {
+ irq->flags |= VFIO_IRQ_SET_DATA_NONE;
+ }
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
+}
+
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
+{
+ struct vfio_irq_set irq = {
+ .argsz = sizeof(irq),
+ .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
+ .index = index,
+ .start = vector,
+ .count = 1,
+ };
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
+}
+
+static void check_supported_irq_index(u32 index)
+{
+ /* VFIO selftests only supports MSI and MSI-x for now. */
+ VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
+ index == VFIO_PCI_MSIX_IRQ_INDEX,
+ "Unsupported IRQ index: %u\n", index);
+}
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
+ int count)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = vector; i < vector + count; i++) {
+ VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
+ device->msi_eventfds[i] = eventfd(0, 0);
+ VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
+ }
+
+ vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
+}
+
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ device->msi_eventfds[i] = -1;
+ }
+
+ vfio_pci_irq_set(device, index, 0, 0, NULL);
+}
+
+static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
+ struct vfio_irq_info *irq_info)
+{
+ irq_info->argsz = sizeof(*irq_info);
+ irq_info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
+}
+
+static void vfio_iommu_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct vfio_iommu_type1_dma_map args = {
+ .argsz = sizeof(args),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (u64)region->vaddr,
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args);
+}
+
+static void iommufd_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct iommu_ioas_map args = {
+ .size = sizeof(args),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .user_va = (u64)region->vaddr,
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = device->ioas_id,
+ };
+
+ ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args);
+}
+
+void vfio_pci_dma_map(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ if (device->iommufd)
+ iommufd_dma_map(device, region);
+ else
+ vfio_iommu_dma_map(device, region);
+
+ list_add(&region->link, &device->dma_regions);
+}
+
+static void vfio_iommu_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct vfio_iommu_type1_dma_unmap args = {
+ .argsz = sizeof(args),
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args);
+}
+
+static void iommufd_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ struct iommu_ioas_unmap args = {
+ .size = sizeof(args),
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = device->ioas_id,
+ };
+
+ ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args);
+}
+
+void vfio_pci_dma_unmap(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ if (device->iommufd)
+ iommufd_dma_unmap(device, region);
+ else
+ vfio_iommu_dma_unmap(device, region);
+
+ list_del(&region->link);
+}
+
+static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
+ struct vfio_region_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->argsz = sizeof(*info);
+ info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
+}
+
+static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+ int prot = 0;
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NULL(bar->vaddr);
+ VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
+
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
+ prot |= PROT_READ;
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
+ prot |= PROT_WRITE;
+
+ bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
+ device->fd, bar->info.offset);
+ VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
+}
+
+static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NOT_NULL(bar->vaddr);
+
+ VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
+ bar->vaddr = NULL;
+}
+
+static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
+{
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (device->bars[i].vaddr)
+ vfio_pci_bar_unmap(device, i);
+ }
+}
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data)
+{
+ struct vfio_region_info *config_space = &device->config_space;
+ int ret;
+
+ if (write)
+ ret = pwrite(device->fd, data, size, config_space->offset + config);
+ else
+ ret = pread(device->fd, data, size, config_space->offset + config);
+
+ VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
+ write ? "write to" : "read from", config);
+}
+
+void vfio_pci_device_reset(struct vfio_pci_device *device)
+{
+ ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
+}
+
+static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
+{
+ char dev_iommu_group_path[PATH_MAX] = {0};
+ char sysfs_path[PATH_MAX] = {0};
+ unsigned int group;
+ int ret;
+
+ snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
+
+ ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
+ VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
+ VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ return group;
+}
+
+static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status),
+ };
+ char group_path[32];
+ int group;
+
+ group = vfio_pci_get_group_from_dev(bdf);
+ snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
+
+ device->group_fd = open(group_path, O_RDWR);
+ VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd);
+}
+
+static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ unsigned long iommu_type = device->iommu_mode->iommu_type;
+ const char *path = device->iommu_mode->container_path;
+ int version;
+ int ret;
+
+ device->container_fd = open(path, O_RDWR);
+ VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path);
+
+ version = ioctl(device->container_fd, VFIO_GET_API_VERSION);
+ VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
+
+ vfio_pci_group_setup(device, bdf);
+
+ ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
+ VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
+
+ ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
+
+ device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
+ VFIO_ASSERT_GE(device->fd, 0);
+}
+
+static void vfio_pci_device_setup(struct vfio_pci_device *device)
+{
+ int i;
+
+ device->info.argsz = sizeof(device->info);
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
+
+ vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
+
+ /* Sanity check VFIO does not advertise mmap for config space */
+ VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
+ "PCI config space should not support mmap()\n");
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ struct vfio_pci_bar *bar = device->bars + i;
+
+ vfio_pci_region_get(device, i, &bar->info);
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
+ vfio_pci_bar_map(device, i);
+ }
+
+ vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
+ vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
+ device->msi_eventfds[i] = -1;
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf)
+{
+ char dir_path[PATH_MAX];
+ struct dirent *entry;
+ char *cdev_path;
+ DIR *dir;
+
+ cdev_path = calloc(PATH_MAX, 1);
+ VFIO_ASSERT_NOT_NULL(cdev_path);
+
+ snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
+
+ dir = opendir(dir_path);
+ VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /* Find the file that starts with "vfio" */
+ if (strncmp("vfio", entry->d_name, 4))
+ continue;
+
+ snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
+ break;
+ }
+
+ VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
+ VFIO_ASSERT_EQ(closedir(dir), 0);
+
+ return cdev_path;
+}
+
+/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
+static const struct vfio_iommu_mode iommu_modes[] = {
+ {
+ .name = "vfio_type1_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "vfio_type1v2_iommu",
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = "iommufd_compat_type1v2",
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = "iommufd",
+ },
+};
+
+const char *default_iommu_mode = "iommufd";
+
+static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode)
+{
+ int i;
+
+ if (!iommu_mode)
+ iommu_mode = default_iommu_mode;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
+ if (strcmp(iommu_mode, iommu_modes[i].name))
+ continue;
+
+ return &iommu_modes[i];
+ }
+
+ VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
+}
+
+static void vfio_device_bind_iommufd(int device_fd, int iommufd)
+{
+ struct vfio_device_bind_iommufd args = {
+ .argsz = sizeof(args),
+ .iommufd = iommufd,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
+}
+
+static u32 iommufd_ioas_alloc(int iommufd)
+{
+ struct iommu_ioas_alloc args = {
+ .size = sizeof(args),
+ };
+
+ ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
+ return args.out_ioas_id;
+}
+
+static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
+{
+ struct vfio_device_attach_iommufd_pt args = {
+ .argsz = sizeof(args),
+ .pt_id = pt_id,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
+}
+
+static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ const char *cdev_path = vfio_pci_get_cdev_path(bdf);
+
+ device->fd = open(cdev_path, O_RDWR);
+ VFIO_ASSERT_GE(device->fd, 0);
+ free((void *)cdev_path);
+
+ /*
+ * Require device->iommufd to be >0 so that a simple non-0 check can be
+ * used to check if iommufd is enabled. In practice open() will never
+ * return 0 unless stdin is closed.
+ */
+ device->iommufd = open("/dev/iommu", O_RDWR);
+ VFIO_ASSERT_GT(device->iommufd, 0);
+
+ vfio_device_bind_iommufd(device->fd, device->iommufd);
+ device->ioas_id = iommufd_ioas_alloc(device->iommufd);
+ vfio_device_attach_iommufd_pt(device->fd, device->ioas_id);
+}
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode)
+{
+ struct vfio_pci_device *device;
+
+ device = calloc(1, sizeof(*device));
+ VFIO_ASSERT_NOT_NULL(device);
+
+ INIT_LIST_HEAD(&device->dma_regions);
+
+ device->iommu_mode = lookup_iommu_mode(iommu_mode);
+
+ if (device->iommu_mode->container_path)
+ vfio_pci_container_setup(device, bdf);
+ else
+ vfio_pci_iommufd_setup(device, bdf);
+
+ vfio_pci_device_setup(device);
+ vfio_pci_driver_probe(device);
+
+ return device;
+}
+
+void vfio_pci_device_cleanup(struct vfio_pci_device *device)
+{
+ int i;
+
+ if (device->driver.initialized)
+ vfio_pci_driver_remove(device);
+
+ vfio_pci_bar_unmap_all(device);
+
+ VFIO_ASSERT_EQ(close(device->fd), 0);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ }
+
+ if (device->iommufd) {
+ VFIO_ASSERT_EQ(close(device->iommufd), 0);
+ } else {
+ VFIO_ASSERT_EQ(close(device->group_fd), 0);
+ VFIO_ASSERT_EQ(close(device->container_fd), 0);
+ }
+
+ free(device);
+}
+
+static bool is_bdf(const char *str)
+{
+ unsigned int s, b, d, f;
+ int length, count;
+
+ count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
+ return count == 4 && length == strlen(str);
+}
+
+const char *vfio_selftests_get_bdf(int *argc, char *argv[])
+{
+ char *bdf;
+
+ if (*argc > 1 && is_bdf(argv[*argc - 1]))
+ return argv[--(*argc)];
+
+ bdf = getenv("VFIO_SELFTESTS_BDF");
+ if (bdf) {
+ VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
+ return bdf;
+ }
+
+ fprintf(stderr, "Unable to determine which device to use, skipping test.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via environment variable:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n");
+ fprintf(stderr, " %s [options]\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via argv:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]);
+ fprintf(stderr, "\n");
+ exit(KSFT_SKIP);
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
new file mode 100644
index 000000000000..e5e8723ecb41
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+
+#include "../../../kselftest.h"
+#include <vfio_util.h>
+
+#ifdef __x86_64__
+extern struct vfio_pci_driver_ops dsa_ops;
+extern struct vfio_pci_driver_ops ioat_ops;
+#endif
+
+static struct vfio_pci_driver_ops *driver_ops[] = {
+#ifdef __x86_64__
+ &dsa_ops,
+ &ioat_ops,
+#endif
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver_ops *ops;
+ int i;
+
+ VFIO_ASSERT_NULL(device->driver.ops);
+
+ for (i = 0; i < ARRAY_SIZE(driver_ops); i++) {
+ ops = driver_ops[i];
+
+ if (ops->probe(device))
+ continue;
+
+ printf("Driver found: %s\n", ops->name);
+ device->driver.ops = ops;
+ }
+}
+
+static void vfio_check_driver_op(struct vfio_pci_driver *driver, void *op,
+ const char *op_name)
+{
+ VFIO_ASSERT_NOT_NULL(driver->ops);
+ VFIO_ASSERT_NOT_NULL(op, "Driver has no %s()\n", op_name);
+ VFIO_ASSERT_EQ(driver->initialized, op != driver->ops->init);
+ VFIO_ASSERT_EQ(driver->memcpy_in_progress, op == driver->ops->memcpy_wait);
+}
+
+#define VFIO_CHECK_DRIVER_OP(_driver, _op) do { \
+ struct vfio_pci_driver *__driver = (_driver); \
+ vfio_check_driver_op(__driver, __driver->ops->_op, #_op); \
+} while (0)
+
+void vfio_pci_driver_init(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_NOT_NULL(driver->region.vaddr);
+ VFIO_CHECK_DRIVER_OP(driver, init);
+
+ driver->ops->init(device);
+
+ driver->initialized = true;
+
+ printf("%s: region: vaddr %p, iova 0x%lx, size 0x%lx\n",
+ driver->ops->name,
+ driver->region.vaddr,
+ driver->region.iova,
+ driver->region.size);
+
+ printf("%s: max_memcpy_size 0x%lx, max_memcpy_count 0x%lx\n",
+ driver->ops->name,
+ driver->max_memcpy_size,
+ driver->max_memcpy_count);
+}
+
+void vfio_pci_driver_remove(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, remove);
+
+ driver->ops->remove(device);
+ driver->initialized = false;
+}
+
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, send_msi);
+
+ driver->ops->send_msi(device);
+}
+
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_LE(size, driver->max_memcpy_size);
+ VFIO_ASSERT_LE(count, driver->max_memcpy_count);
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_start);
+
+ driver->ops->memcpy_start(device, src, dst, size, count);
+ driver->memcpy_in_progress = true;
+}
+
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+ int r;
+
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_wait);
+
+ r = driver->ops->memcpy_wait(device);
+ driver->memcpy_in_progress = false;
+
+ return r;
+}
+
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size)
+{
+ vfio_pci_driver_memcpy_start(device, src, dst, size, 1);
+
+ return vfio_pci_driver_memcpy_wait(device);
+}
diff --git a/tools/testing/selftests/vfio/run.sh b/tools/testing/selftests/vfio/run.sh
new file mode 100755
index 000000000000..0476b6d7adc3
--- /dev/null
+++ b/tools/testing/selftests/vfio/run.sh
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# Global variables initialized in main() and then used during cleanup() when
+# the script exits.
+declare DEVICE_BDF
+declare NEW_DRIVER
+declare OLD_DRIVER
+declare OLD_NUMVFS
+declare DRIVER_OVERRIDE
+
+function write_to() {
+ # Unfortunately set -x does not show redirects so use echo to manually
+ # tell the user what commands are being run.
+ echo "+ echo \"${2}\" > ${1}"
+ echo "${2}" > ${1}
+}
+
+function bind() {
+ write_to /sys/bus/pci/drivers/${2}/bind ${1}
+}
+
+function unbind() {
+ write_to /sys/bus/pci/drivers/${2}/unbind ${1}
+}
+
+function set_sriov_numvfs() {
+ write_to /sys/bus/pci/devices/${1}/sriov_numvfs ${2}
+}
+
+function set_driver_override() {
+ write_to /sys/bus/pci/devices/${1}/driver_override ${2}
+}
+
+function clear_driver_override() {
+ set_driver_override ${1} ""
+}
+
+function cleanup() {
+ if [ "${NEW_DRIVER}" ]; then unbind ${DEVICE_BDF} ${NEW_DRIVER} ; fi
+ if [ "${DRIVER_OVERRIDE}" ]; then clear_driver_override ${DEVICE_BDF} ; fi
+ if [ "${OLD_DRIVER}" ]; then bind ${DEVICE_BDF} ${OLD_DRIVER} ; fi
+ if [ "${OLD_NUMVFS}" ]; then set_sriov_numvfs ${DEVICE_BDF} ${OLD_NUMVFS} ; fi
+}
+
+function usage() {
+ echo "usage: $0 [-d segment:bus:device.function] [-s] [-h] [cmd ...]" >&2
+ echo >&2
+ echo " -d: The BDF of the device to use for the test (required)" >&2
+ echo " -h: Show this help message" >&2
+ echo " -s: Drop into a shell rather than running a command" >&2
+ echo >&2
+ echo " cmd: The command to run and arguments to pass to it." >&2
+ echo " Required when not using -s. The SBDF will be " >&2
+ echo " appended to the argument list." >&2
+ exit 1
+}
+
+function main() {
+ local shell
+
+ while getopts "d:hs" opt; do
+ case $opt in
+ d) DEVICE_BDF="$OPTARG" ;;
+ s) shell=true ;;
+ *) usage ;;
+ esac
+ done
+
+ # Shift past all optional arguments.
+ shift $((OPTIND - 1))
+
+ # Check that the user passed in the command to run.
+ [ ! "${shell}" ] && [ $# = 0 ] && usage
+
+ # Check that the user passed in a BDF.
+ [ "${DEVICE_BDF}" ] || usage
+
+ trap cleanup EXIT
+ set -e
+
+ test -d /sys/bus/pci/devices/${DEVICE_BDF}
+
+ if [ -f /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs ]; then
+ OLD_NUMVFS=$(cat /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs)
+ set_sriov_numvfs ${DEVICE_BDF} 0
+ fi
+
+ if [ -L /sys/bus/pci/devices/${DEVICE_BDF}/driver ]; then
+ OLD_DRIVER=$(basename $(readlink -m /sys/bus/pci/devices/${DEVICE_BDF}/driver))
+ unbind ${DEVICE_BDF} ${OLD_DRIVER}
+ fi
+
+ set_driver_override ${DEVICE_BDF} vfio-pci
+ DRIVER_OVERRIDE=true
+
+ bind ${DEVICE_BDF} vfio-pci
+ NEW_DRIVER=vfio-pci
+
+ echo
+ if [ "${shell}" ]; then
+ echo "Dropping into ${SHELL} with VFIO_SELFTESTS_BDF=${DEVICE_BDF}"
+ VFIO_SELFTESTS_BDF=${DEVICE_BDF} ${SHELL}
+ else
+ "$@" ${DEVICE_BDF}
+ fi
+ echo
+}
+
+main "$@"
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
new file mode 100644
index 000000000000..ab19c54a774d
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <vfio_util.h>
+
+#include "../kselftest_harness.h"
+
+static const char *device_bdf;
+
+struct iommu_mapping {
+ u64 pgd;
+ u64 p4d;
+ u64 pud;
+ u64 pmd;
+ u64 pte;
+};
+
+static void parse_next_value(char **line, u64 *value)
+{
+ char *token;
+
+ token = strtok_r(*line, " \t|\n", line);
+ if (!token)
+ return;
+
+ /* Caller verifies `value`. No need to check return value. */
+ sscanf(token, "0x%lx", value);
+}
+
+static int intel_iommu_mapping_get(const char *bdf, u64 iova,
+ struct iommu_mapping *mapping)
+{
+ char iommu_mapping_path[PATH_MAX], line[PATH_MAX];
+ u64 line_iova = -1;
+ int ret = -ENOENT;
+ FILE *file;
+ char *rest;
+
+ snprintf(iommu_mapping_path, sizeof(iommu_mapping_path),
+ "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct",
+ bdf);
+
+ printf("Searching for IOVA 0x%lx in %s\n", iova, iommu_mapping_path);
+
+ file = fopen(iommu_mapping_path, "r");
+ VFIO_ASSERT_NOT_NULL(file, "fopen(%s) failed", iommu_mapping_path);
+
+ while (fgets(line, sizeof(line), file)) {
+ rest = line;
+
+ parse_next_value(&rest, &line_iova);
+ if (line_iova != (iova / getpagesize()))
+ continue;
+
+ /*
+ * Ensure each struct field is initialized in case of empty
+ * page table values.
+ */
+ memset(mapping, 0, sizeof(*mapping));
+ parse_next_value(&rest, &mapping->pgd);
+ parse_next_value(&rest, &mapping->p4d);
+ parse_next_value(&rest, &mapping->pud);
+ parse_next_value(&rest, &mapping->pmd);
+ parse_next_value(&rest, &mapping->pte);
+
+ ret = 0;
+ break;
+ }
+
+ fclose(file);
+
+ if (ret)
+ printf("IOVA not found\n");
+
+ return ret;
+}
+
+static int iommu_mapping_get(const char *bdf, u64 iova,
+ struct iommu_mapping *mapping)
+{
+ if (!access("/sys/kernel/debug/iommu/intel", F_OK))
+ return intel_iommu_mapping_get(bdf, iova, mapping);
+
+ return -EOPNOTSUPP;
+}
+
+FIXTURE(vfio_dma_mapping_test) {
+ struct vfio_pci_device *device;
+};
+
+FIXTURE_VARIANT(vfio_dma_mapping_test) {
+ const char *iommu_mode;
+ u64 size;
+ int mmap_flags;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode, _name, _size, _mmap_flags) \
+FIXTURE_VARIANT_ADD(vfio_dma_mapping_test, _iommu_mode ## _ ## _name) { \
+ .iommu_mode = #_iommu_mode, \
+ .size = (_size), \
+ .mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE | (_mmap_flags), \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0);
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB);
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB);
+
+FIXTURE_SETUP(vfio_dma_mapping_test)
+{
+ self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+}
+
+FIXTURE_TEARDOWN(vfio_dma_mapping_test)
+{
+ vfio_pci_device_cleanup(self->device);
+}
+
+TEST_F(vfio_dma_mapping_test, dma_map_unmap)
+{
+ const u64 size = variant->size ?: getpagesize();
+ const int flags = variant->mmap_flags;
+ struct vfio_dma_region region;
+ struct iommu_mapping mapping;
+ u64 mapping_size = size;
+ int rc;
+
+ region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
+
+ /* Skip the test if there aren't enough HugeTLB pages available. */
+ if (flags & MAP_HUGETLB && region.vaddr == MAP_FAILED)
+ SKIP(return, "mmap() failed: %s (%d)\n", strerror(errno), errno);
+ else
+ ASSERT_NE(region.vaddr, MAP_FAILED);
+
+ region.iova = (u64)region.vaddr;
+ region.size = size;
+
+ vfio_pci_dma_map(self->device, &region);
+ printf("Mapped HVA %p (size 0x%lx) at IOVA 0x%lx\n", region.vaddr, size, region.iova);
+
+ ASSERT_EQ(region.iova, to_iova(self->device, region.vaddr));
+
+ rc = iommu_mapping_get(device_bdf, region.iova, &mapping);
+ if (rc == -EOPNOTSUPP)
+ goto unmap;
+
+ /*
+ * IOMMUFD compatibility-mode does not support huge mappings when
+ * using VFIO_TYPE1_IOMMU.
+ */
+ if (!strcmp(variant->iommu_mode, "iommufd_compat_type1"))
+ mapping_size = SZ_4K;
+
+ ASSERT_EQ(0, rc);
+ printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova);
+ printf("PGD: 0x%016lx\n", mapping.pgd);
+ printf("P4D: 0x%016lx\n", mapping.p4d);
+ printf("PUD: 0x%016lx\n", mapping.pud);
+ printf("PMD: 0x%016lx\n", mapping.pmd);
+ printf("PTE: 0x%016lx\n", mapping.pte);
+
+ switch (mapping_size) {
+ case SZ_4K:
+ ASSERT_NE(0, mapping.pte);
+ break;
+ case SZ_2M:
+ ASSERT_EQ(0, mapping.pte);
+ ASSERT_NE(0, mapping.pmd);
+ break;
+ case SZ_1G:
+ ASSERT_EQ(0, mapping.pte);
+ ASSERT_EQ(0, mapping.pmd);
+ ASSERT_NE(0, mapping.pud);
+ break;
+ default:
+ VFIO_FAIL("Unrecognized size: 0x%lx\n", mapping_size);
+ }
+
+unmap:
+ vfio_pci_dma_unmap(self->device, &region);
+ printf("Unmapped IOVA 0x%lx\n", region.iova);
+ ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr));
+ ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping));
+
+ ASSERT_TRUE(!munmap(region.vaddr, size));
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
new file mode 100644
index 000000000000..3655106b912d
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/types.h>
+#include <linux/limits.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <vfio_util.h>
+#include "../kselftest_harness.h"
+
+static const char iommu_dev_path[] = "/dev/iommu";
+static const char *cdev_path;
+
+static int vfio_device_bind_iommufd_ioctl(int cdev_fd, int iommufd)
+{
+ struct vfio_device_bind_iommufd bind_args = {
+ .argsz = sizeof(bind_args),
+ .iommufd = iommufd,
+ };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_BIND_IOMMUFD, &bind_args);
+}
+
+static int vfio_device_get_info_ioctl(int cdev_fd)
+{
+ struct vfio_device_info info_args = { .argsz = sizeof(info_args) };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_GET_INFO, &info_args);
+}
+
+static int vfio_device_ioas_alloc_ioctl(int iommufd, struct iommu_ioas_alloc *alloc_args)
+{
+ *alloc_args = (struct iommu_ioas_alloc){
+ .size = sizeof(struct iommu_ioas_alloc),
+ };
+
+ return ioctl(iommufd, IOMMU_IOAS_ALLOC, alloc_args);
+}
+
+static int vfio_device_attach_iommufd_pt_ioctl(int cdev_fd, u32 pt_id)
+{
+ struct vfio_device_attach_iommufd_pt attach_args = {
+ .argsz = sizeof(attach_args),
+ .pt_id = pt_id,
+ };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_args);
+}
+
+static int vfio_device_detach_iommufd_pt_ioctl(int cdev_fd)
+{
+ struct vfio_device_detach_iommufd_pt detach_args = {
+ .argsz = sizeof(detach_args),
+ };
+
+ return ioctl(cdev_fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_args);
+}
+
+FIXTURE(vfio_cdev) {
+ int cdev_fd;
+ int iommufd;
+};
+
+FIXTURE_SETUP(vfio_cdev)
+{
+ ASSERT_LE(0, (self->cdev_fd = open(cdev_path, O_RDWR, 0)));
+ ASSERT_LE(0, (self->iommufd = open(iommu_dev_path, O_RDWR, 0)));
+}
+
+FIXTURE_TEARDOWN(vfio_cdev)
+{
+ ASSERT_EQ(0, close(self->cdev_fd));
+ ASSERT_EQ(0, close(self->iommufd));
+}
+
+TEST_F(vfio_cdev, bind)
+{
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_EQ(0, vfio_device_get_info_ioctl(self->cdev_fd));
+}
+
+TEST_F(vfio_cdev, get_info_without_bind_fails)
+{
+ ASSERT_NE(0, vfio_device_get_info_ioctl(self->cdev_fd));
+}
+
+TEST_F(vfio_cdev, bind_bad_iommufd_fails)
+{
+ ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, -2));
+}
+
+TEST_F(vfio_cdev, repeated_bind_fails)
+{
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+}
+
+TEST_F(vfio_cdev, attach_detatch_pt)
+{
+ struct iommu_ioas_alloc alloc_args;
+
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_EQ(0, vfio_device_ioas_alloc_ioctl(self->iommufd, &alloc_args));
+ ASSERT_EQ(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, alloc_args.out_ioas_id));
+ ASSERT_EQ(0, vfio_device_detach_iommufd_pt_ioctl(self->cdev_fd));
+}
+
+TEST_F(vfio_cdev, attach_invalid_pt_fails)
+{
+ ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd));
+ ASSERT_NE(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, UINT32_MAX));
+}
+
+int main(int argc, char *argv[])
+{
+ const char *device_bdf = vfio_selftests_get_bdf(&argc, argv);
+
+ cdev_path = vfio_pci_get_cdev_path(device_bdf);
+ printf("Using cdev device %s\n", cdev_path);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c
new file mode 100644
index 000000000000..7a270698e4d2
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdlib.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/limits.h>
+#include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <vfio_util.h>
+
+#include "../kselftest_harness.h"
+
+static const char *device_bdf;
+
+/*
+ * Limit the number of MSIs enabled/disabled by the test regardless of the
+ * number of MSIs the device itself supports, e.g. to avoid hitting IRTE limits.
+ */
+#define MAX_TEST_MSI 16U
+
+FIXTURE(vfio_pci_device_test) {
+ struct vfio_pci_device *device;
+};
+
+FIXTURE_SETUP(vfio_pci_device_test)
+{
+ self->device = vfio_pci_device_init(device_bdf, default_iommu_mode);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_device_test)
+{
+ vfio_pci_device_cleanup(self->device);
+}
+
+#define read_pci_id_from_sysfs(_file) ({ \
+ char __sysfs_path[PATH_MAX]; \
+ char __buf[32]; \
+ int __fd; \
+ \
+ snprintf(__sysfs_path, PATH_MAX, "/sys/bus/pci/devices/%s/%s", device_bdf, _file); \
+ ASSERT_GT((__fd = open(__sysfs_path, O_RDONLY)), 0); \
+ ASSERT_GT(read(__fd, __buf, ARRAY_SIZE(__buf)), 0); \
+ ASSERT_EQ(0, close(__fd)); \
+ (u16)strtoul(__buf, NULL, 0); \
+})
+
+TEST_F(vfio_pci_device_test, config_space_read_write)
+{
+ u16 vendor, device;
+ u16 command;
+
+ /* Check that Vendor and Device match what the kernel reports. */
+ vendor = read_pci_id_from_sysfs("vendor");
+ device = read_pci_id_from_sysfs("device");
+ ASSERT_TRUE(vfio_pci_device_match(self->device, vendor, device));
+
+ printf("Vendor: %04x, Device: %04x\n", vendor, device);
+
+ command = vfio_pci_config_readw(self->device, PCI_COMMAND);
+ ASSERT_FALSE(command & PCI_COMMAND_MASTER);
+
+ vfio_pci_config_writew(self->device, PCI_COMMAND, command | PCI_COMMAND_MASTER);
+ command = vfio_pci_config_readw(self->device, PCI_COMMAND);
+ ASSERT_TRUE(command & PCI_COMMAND_MASTER);
+ printf("Enabled Bus Mastering (command: %04x)\n", command);
+
+ vfio_pci_config_writew(self->device, PCI_COMMAND, command & ~PCI_COMMAND_MASTER);
+ command = vfio_pci_config_readw(self->device, PCI_COMMAND);
+ ASSERT_FALSE(command & PCI_COMMAND_MASTER);
+ printf("Disabled Bus Mastering (command: %04x)\n", command);
+}
+
+TEST_F(vfio_pci_device_test, validate_bars)
+{
+ struct vfio_pci_bar *bar;
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ bar = &self->device->bars[i];
+
+ if (!(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) {
+ printf("BAR %d does not support mmap()\n", i);
+ ASSERT_EQ(NULL, bar->vaddr);
+ continue;
+ }
+
+ /*
+ * BARs that support mmap() should be automatically mapped by
+ * vfio_pci_device_init().
+ */
+ ASSERT_NE(NULL, bar->vaddr);
+ ASSERT_NE(0, bar->info.size);
+ printf("BAR %d mapped at %p (size 0x%llx)\n", i, bar->vaddr, bar->info.size);
+ }
+}
+
+FIXTURE(vfio_pci_irq_test) {
+ struct vfio_pci_device *device;
+};
+
+FIXTURE_VARIANT(vfio_pci_irq_test) {
+ int irq_index;
+};
+
+FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msi) {
+ .irq_index = VFIO_PCI_MSI_IRQ_INDEX,
+};
+
+FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msix) {
+ .irq_index = VFIO_PCI_MSIX_IRQ_INDEX,
+};
+
+FIXTURE_SETUP(vfio_pci_irq_test)
+{
+ self->device = vfio_pci_device_init(device_bdf, default_iommu_mode);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_irq_test)
+{
+ vfio_pci_device_cleanup(self->device);
+}
+
+TEST_F(vfio_pci_irq_test, enable_trigger_disable)
+{
+ bool msix = variant->irq_index == VFIO_PCI_MSIX_IRQ_INDEX;
+ int msi_eventfd;
+ u32 count;
+ u64 value;
+ int i;
+
+ if (msix)
+ count = self->device->msix_info.count;
+ else
+ count = self->device->msi_info.count;
+
+ count = min(count, MAX_TEST_MSI);
+
+ if (!count)
+ SKIP(return, "MSI%s: not supported\n", msix ? "-x" : "");
+
+ vfio_pci_irq_enable(self->device, variant->irq_index, 0, count);
+ printf("MSI%s: enabled %d interrupts\n", msix ? "-x" : "", count);
+
+ for (i = 0; i < count; i++) {
+ msi_eventfd = self->device->msi_eventfds[i];
+
+ fcntl_set_nonblock(msi_eventfd);
+ ASSERT_EQ(-1, read(msi_eventfd, &value, 8));
+ ASSERT_EQ(EAGAIN, errno);
+
+ vfio_pci_irq_trigger(self->device, variant->irq_index, i);
+
+ ASSERT_EQ(8, read(msi_eventfd, &value, 8));
+ ASSERT_EQ(1, value);
+ }
+
+ vfio_pci_irq_disable(self->device, variant->irq_index);
+}
+
+TEST_F(vfio_pci_device_test, reset)
+{
+ if (!(self->device->info.flags & VFIO_DEVICE_FLAGS_RESET))
+ SKIP(return, "Device does not support reset\n");
+
+ vfio_pci_device_reset(self->device);
+}
+
+int main(int argc, char *argv[])
+{
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
new file mode 100644
index 000000000000..2dbd70b7db62
--- /dev/null
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/sizes.h>
+#include <linux/vfio.h>
+
+#include <vfio_util.h>
+
+#include "../kselftest_harness.h"
+
+static const char *device_bdf;
+
+#define ASSERT_NO_MSI(_eventfd) do { \
+ u64 __value; \
+ \
+ ASSERT_EQ(-1, read(_eventfd, &__value, 8)); \
+ ASSERT_EQ(EAGAIN, errno); \
+} while (0)
+
+static void region_setup(struct vfio_pci_device *device,
+ struct vfio_dma_region *region, u64 size)
+{
+ const int flags = MAP_SHARED | MAP_ANONYMOUS;
+ const int prot = PROT_READ | PROT_WRITE;
+ void *vaddr;
+
+ vaddr = mmap(NULL, size, prot, flags, -1, 0);
+ VFIO_ASSERT_NE(vaddr, MAP_FAILED);
+
+ region->vaddr = vaddr;
+ region->iova = (u64)vaddr;
+ region->size = size;
+
+ vfio_pci_dma_map(device, region);
+}
+
+static void region_teardown(struct vfio_pci_device *device,
+ struct vfio_dma_region *region)
+{
+ vfio_pci_dma_unmap(device, region);
+ VFIO_ASSERT_EQ(munmap(region->vaddr, region->size), 0);
+}
+
+FIXTURE(vfio_pci_driver_test) {
+ struct vfio_pci_device *device;
+ struct vfio_dma_region memcpy_region;
+ void *vaddr;
+ int msi_fd;
+
+ u64 size;
+ void *src;
+ void *dst;
+ iova_t src_iova;
+ iova_t dst_iova;
+ iova_t unmapped_iova;
+};
+
+FIXTURE_VARIANT(vfio_pci_driver_test) {
+ const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
+FIXTURE_VARIANT_ADD(vfio_pci_driver_test, _iommu_mode) { \
+ .iommu_mode = #_iommu_mode, \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+FIXTURE_SETUP(vfio_pci_driver_test)
+{
+ struct vfio_pci_driver *driver;
+
+ self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+
+ driver = &self->device->driver;
+
+ region_setup(self->device, &self->memcpy_region, SZ_1G);
+ region_setup(self->device, &driver->region, SZ_2M);
+
+ /* Any IOVA that doesn't overlap memcpy_region and driver->region. */
+ self->unmapped_iova = 8UL * SZ_1G;
+
+ vfio_pci_driver_init(self->device);
+ self->msi_fd = self->device->msi_eventfds[driver->msi];
+
+ /*
+ * Use the maximum size supported by the device for memcpy operations,
+ * slimmed down to fit into the memcpy region (divided by 2 so src and
+ * dst regions do not overlap).
+ */
+ self->size = self->device->driver.max_memcpy_size;
+ self->size = min(self->size, self->memcpy_region.size / 2);
+
+ self->src = self->memcpy_region.vaddr;
+ self->dst = self->src + self->size;
+
+ self->src_iova = to_iova(self->device, self->src);
+ self->dst_iova = to_iova(self->device, self->dst);
+}
+
+FIXTURE_TEARDOWN(vfio_pci_driver_test)
+{
+ struct vfio_pci_driver *driver = &self->device->driver;
+
+ vfio_pci_driver_remove(self->device);
+
+ region_teardown(self->device, &self->memcpy_region);
+ region_teardown(self->device, &driver->region);
+
+ vfio_pci_device_cleanup(self->device);
+}
+
+TEST_F(vfio_pci_driver_test, init_remove)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ vfio_pci_driver_remove(self->device);
+ vfio_pci_driver_init(self->device);
+ }
+}
+
+TEST_F(vfio_pci_driver_test, memcpy_success)
+{
+ fcntl_set_nonblock(self->msi_fd);
+
+ memset(self->src, 'x', self->size);
+ memset(self->dst, 'y', self->size);
+
+ ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device,
+ self->src_iova,
+ self->dst_iova,
+ self->size));
+
+ ASSERT_EQ(0, memcmp(self->src, self->dst, self->size));
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+TEST_F(vfio_pci_driver_test, memcpy_from_unmapped_iova)
+{
+ fcntl_set_nonblock(self->msi_fd);
+
+ /*
+ * Ignore the return value since not all devices will detect and report
+ * accesses to unmapped IOVAs as errors.
+ */
+ vfio_pci_driver_memcpy(self->device, self->unmapped_iova,
+ self->dst_iova, self->size);
+
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+TEST_F(vfio_pci_driver_test, memcpy_to_unmapped_iova)
+{
+ fcntl_set_nonblock(self->msi_fd);
+
+ /*
+ * Ignore the return value since not all devices will detect and report
+ * accesses to unmapped IOVAs as errors.
+ */
+ vfio_pci_driver_memcpy(self->device, self->src_iova,
+ self->unmapped_iova, self->size);
+
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+TEST_F(vfio_pci_driver_test, send_msi)
+{
+ u64 value;
+
+ vfio_pci_driver_send_msi(self->device);
+ ASSERT_EQ(8, read(self->msi_fd, &value, 8));
+ ASSERT_EQ(1, value);
+}
+
+TEST_F(vfio_pci_driver_test, mix_and_match)
+{
+ u64 value;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ memset(self->src, 'x', self->size);
+ memset(self->dst, 'y', self->size);
+
+ ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device,
+ self->src_iova,
+ self->dst_iova,
+ self->size));
+
+ ASSERT_EQ(0, memcmp(self->src, self->dst, self->size));
+
+ vfio_pci_driver_memcpy(self->device,
+ self->unmapped_iova,
+ self->dst_iova,
+ self->size);
+
+ vfio_pci_driver_send_msi(self->device);
+ ASSERT_EQ(8, read(self->msi_fd, &value, 8));
+ ASSERT_EQ(1, value);
+ }
+}
+
+TEST_F_TIMEOUT(vfio_pci_driver_test, memcpy_storm, 60)
+{
+ struct vfio_pci_driver *driver = &self->device->driver;
+ u64 total_size;
+ u64 count;
+
+ fcntl_set_nonblock(self->msi_fd);
+
+ /*
+ * Perform up to 250GiB worth of DMA reads and writes across several
+ * memcpy operations. Some devices can support even more but the test
+ * will take too long.
+ */
+ total_size = 250UL * SZ_1G;
+ count = min(total_size / self->size, driver->max_memcpy_count);
+
+ printf("Kicking off %lu memcpys of size 0x%lx\n", count, self->size);
+ vfio_pci_driver_memcpy_start(self->device,
+ self->src_iova,
+ self->dst_iova,
+ self->size, count);
+
+ ASSERT_EQ(0, vfio_pci_driver_memcpy_wait(self->device));
+ ASSERT_NO_MSI(self->msi_fd);
+}
+
+int main(int argc, char *argv[])
+{
+ struct vfio_pci_device *device;
+
+ device_bdf = vfio_selftests_get_bdf(&argc, argv);
+
+ device = vfio_pci_device_init(device_bdf, default_iommu_mode);
+ if (!device->driver.ops) {
+ fprintf(stderr, "No driver found for device %s\n", device_bdf);
+ return KSFT_SKIP;
+ }
+ vfio_pci_device_cleanup(device);
+
+ return test_harness_run(argc, argv);
+}