summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-09-19 16:02:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-09-19 16:02:48 -0700
commit497b9a7b8df955fffd612d0d4aaf315b03556b10 (patch)
treec75fb2ac788a69d4e43d1865ff69a9ad387d515d
parent1522b530ac3e2dadd75ccb351b88d3c7c4cf584e (diff)
parent1e56310b40fd2e7e0b9493da9ff488af145bdd0c (diff)
Merge tag 'iommu-fixes-v6.17-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux
Pull iommu fixes from Joerg Roedel: - Fixes for memory leak and memory corruption bugs on S390 and AMD-Vi - Race condition fix in AMD-Vi page table code and S390 device attach code - Intel VT-d: Fix alignment checks in __domain_mapping() - AMD-Vi: Fix potentially incorrect DTE settings when device has aliases * tag 'iommu-fixes-v6.17-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux: iommu/amd/pgtbl: Fix possible race while increase page table level iommu/amd: Fix alias device DTE setting iommu/s390: Make attach succeed when the device was surprise removed iommu/vt-d: Fix __domain_mapping()'s usage of switch_to_super_page() iommu/s390: Fix memory corruption when using identity domain iommu/amd: Fix ivrs_base memleak in early_amd_iommu_init()
-rw-r--r--arch/s390/include/asm/pci_insn.h10
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd/init.c9
-rw-r--r--drivers/iommu/amd/io_pgtable.c25
-rw-r--r--drivers/iommu/intel/iommu.c7
-rw-r--r--drivers/iommu/s390-iommu.c29
6 files changed, 59 insertions, 22 deletions
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index e5f57cfe1d45..025c6dcbf893 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -16,11 +16,11 @@
#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40
#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44
-/* Load/Store return codes */
-#define ZPCI_PCI_LS_OK 0
-#define ZPCI_PCI_LS_ERR 1
-#define ZPCI_PCI_LS_BUSY 2
-#define ZPCI_PCI_LS_INVAL_HANDLE 3
+/* PCI instruction condition codes */
+#define ZPCI_CC_OK 0
+#define ZPCI_CC_ERR 1
+#define ZPCI_CC_BUSY 2
+#define ZPCI_CC_INVAL_HANDLE 3
/* Load/Store address space identifiers */
#define ZPCI_PCIAS_MEMIO_0 0
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5219d7ddfdaa..95f63c5f6159 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -555,6 +555,7 @@ struct gcr3_tbl_info {
};
struct amd_io_pgtable {
+ seqcount_t seqcount; /* Protects root/mode update */
struct io_pgtable pgtbl;
int mode;
u64 *root;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 8de689b2c5ed..ba9e582a8bbe 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1455,12 +1455,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
PCI_FUNC(e->devid));
devid = e->devid;
- for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
- if (alias)
+ if (alias) {
+ for (dev_i = devid_start; dev_i <= devid; ++dev_i)
pci_seg->alias_table[dev_i] = devid_to;
+ set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
}
set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
- set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
break;
case IVHD_DEV_SPECIAL: {
u8 handle, type;
@@ -3067,7 +3067,8 @@ static int __init early_amd_iommu_init(void)
if (!boot_cpu_has(X86_FEATURE_CX16)) {
pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/*
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index a91e71f981ef..70c2f5b1631b 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/dma-mapping.h>
+#include <linux/seqlock.h>
#include <asm/barrier.h>
@@ -130,8 +131,11 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
*pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
+ write_seqcount_begin(&pgtable->seqcount);
pgtable->root = pte;
pgtable->mode += 1;
+ write_seqcount_end(&pgtable->seqcount);
+
amd_iommu_update_and_flush_device_table(domain);
pte = NULL;
@@ -153,6 +157,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
{
unsigned long last_addr = address + (page_size - 1);
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
+ unsigned int seqcount;
int level, end_lvl;
u64 *pte, *page;
@@ -170,8 +175,14 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
}
- level = pgtable->mode - 1;
- pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ do {
+ seqcount = read_seqcount_begin(&pgtable->seqcount);
+
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
+
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -249,6 +260,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
unsigned long *page_size)
{
int level;
+ unsigned int seqcount;
u64 *pte;
*page_size = 0;
@@ -256,8 +268,12 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
if (address > PM_LEVEL_SIZE(pgtable->mode))
return NULL;
- level = pgtable->mode - 1;
- pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ do {
+ seqcount = read_seqcount_begin(&pgtable->seqcount);
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
@@ -541,6 +557,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
if (!pgtable->root)
return NULL;
pgtable->mode = PAGE_MODE_3_LEVEL;
+ seqcount_init(&pgtable->seqcount);
cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9c3ab9d9f69a..dff2d895b8ab 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1575,6 +1575,10 @@ static void switch_to_super_page(struct dmar_domain *domain,
unsigned long lvl_pages = lvl_to_nr_pages(level);
struct dma_pte *pte = NULL;
+ if (WARN_ON(!IS_ALIGNED(start_pfn, lvl_pages) ||
+ !IS_ALIGNED(end_pfn + 1, lvl_pages)))
+ return;
+
while (start_pfn <= end_pfn) {
if (!pte)
pte = pfn_to_dma_pte(domain, start_pfn, &level,
@@ -1650,7 +1654,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long pages_to_remove;
pteval |= DMA_PTE_LARGE_PAGE;
- pages_to_remove = min_t(unsigned long, nr_pages,
+ pages_to_remove = min_t(unsigned long,
+ round_down(nr_pages, lvl_pages),
nr_pte_to_next_page(pte) * lvl_pages);
end_pfn = iov_pfn + pages_to_remove - 1;
switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 9c80d61deb2c..aa576736d60b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -612,6 +612,23 @@ static u64 get_iota_region_flag(struct s390_domain *domain)
}
}
+static bool reg_ioat_propagate_error(int cc, u8 status)
+{
+ /*
+ * If the device is in the error state the reset routine
+ * will register the IOAT of the newly set domain on re-enable
+ */
+ if (cc == ZPCI_CC_ERR && status == ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ return false;
+ /*
+ * If the device was removed treat registration as success
+ * and let the subsequent error event trigger tear down.
+ */
+ if (cc == ZPCI_CC_INVAL_HANDLE)
+ return false;
+ return cc != ZPCI_CC_OK;
+}
+
static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
struct iommu_domain *domain, u8 *status)
{
@@ -696,7 +713,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
/* If we fail now DMA remains blocked via blocking domain */
cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
- if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ if (reg_ioat_propagate_error(cc, status))
return -EIO;
zdev->dma_table = s390_domain->dma_table;
zdev_s390_domain_update(zdev, domain);
@@ -1032,7 +1049,8 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
lockdep_assert_held(&zdev->dom_lock);
- if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
+ if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
+ zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY)
return NULL;
s390_domain = to_s390_domain(zdev->s390_domain);
@@ -1123,12 +1141,7 @@ static int s390_attach_dev_identity(struct iommu_domain *domain,
/* If we fail now DMA remains blocked via blocking domain */
cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
-
- /*
- * If the device is undergoing error recovery the reset code
- * will re-establish the new domain.
- */
- if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ if (reg_ioat_propagate_error(cc, status))
return -EIO;
zdev_s390_domain_update(zdev, domain);