summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVasant Hegde <vasant.hegde@amd.com>2025-09-13 06:26:57 +0000
committerJoerg Roedel <joerg.roedel@amd.com>2025-09-19 09:39:40 +0200
commit1e56310b40fd2e7e0b9493da9ff488af145bdd0c (patch)
tree931e288d1c28399e26617d722a52c637562677ab
parenta0c17ed907ac3326cf3c9d6007ea222a746f5cc2 (diff)
iommu/amd/pgtbl: Fix possible race while increase page table level
The AMD IOMMU host page table implementation supports dynamic page table levels (up to 6 levels), starting with a 3-level configuration that expands based on IOVA address. The kernel maintains a root pointer and current page table level to enable proper page table walks in alloc_pte()/fetch_pte() operations. The IOMMU IOVA allocator initially starts with 32-bit address and onces its exhuasted it switches to 64-bit address (max address is determined based on IOMMU and device DMA capability). To support larger IOVA, AMD IOMMU driver increases page table level. But in unmap path (iommu_v1_unmap_pages()), fetch_pte() reads pgtable->[root/mode] without lock. So its possible that in exteme corner case, when increase_address_space() is updating pgtable->[root/mode], fetch_pte() reads wrong page table level (pgtable->mode). It does compare the value with level encoded in page table and returns NULL. This will result is iommu_unmap ops to fail and upper layer may retry/log WARN_ON. CPU 0 CPU 1 ------ ------ map pages unmap pages alloc_pte() -> increase_address_space() iommu_v1_unmap_pages() -> fetch_pte() pgtable->root = pte (new root value) READ pgtable->[mode/root] Reads new root, old mode Updates mode (pgtable->mode += 1) Since Page table level updates are infrequent and already synchronized with a spinlock, implement seqcount to enable lock-free read operations on the read path. Fixes: 754265bcab7 ("iommu/amd: Fix race in increase_address_space()") Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com> Cc: stable@vger.kernel.org Cc: Joao Martins <joao.m.martins@oracle.com> Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Signed-off-by: Vasant Hegde <vasant.hegde@amd.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h1
-rw-r--r--drivers/iommu/amd/io_pgtable.c25
2 files changed, 22 insertions, 4 deletions
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5219d7ddfdaa..95f63c5f6159 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -555,6 +555,7 @@ struct gcr3_tbl_info {
};
struct amd_io_pgtable {
+ seqcount_t seqcount; /* Protects root/mode update */
struct io_pgtable pgtbl;
int mode;
u64 *root;
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index a91e71f981ef..70c2f5b1631b 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/dma-mapping.h>
+#include <linux/seqlock.h>
#include <asm/barrier.h>
@@ -130,8 +131,11 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
*pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
+ write_seqcount_begin(&pgtable->seqcount);
pgtable->root = pte;
pgtable->mode += 1;
+ write_seqcount_end(&pgtable->seqcount);
+
amd_iommu_update_and_flush_device_table(domain);
pte = NULL;
@@ -153,6 +157,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
{
unsigned long last_addr = address + (page_size - 1);
struct io_pgtable_cfg *cfg = &pgtable->pgtbl.cfg;
+ unsigned int seqcount;
int level, end_lvl;
u64 *pte, *page;
@@ -170,8 +175,14 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
}
- level = pgtable->mode - 1;
- pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ do {
+ seqcount = read_seqcount_begin(&pgtable->seqcount);
+
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
+
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -249,6 +260,7 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
unsigned long *page_size)
{
int level;
+ unsigned int seqcount;
u64 *pte;
*page_size = 0;
@@ -256,8 +268,12 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
if (address > PM_LEVEL_SIZE(pgtable->mode))
return NULL;
- level = pgtable->mode - 1;
- pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ do {
+ seqcount = read_seqcount_begin(&pgtable->seqcount);
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ } while (read_seqcount_retry(&pgtable->seqcount, seqcount));
+
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
@@ -541,6 +557,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
if (!pgtable->root)
return NULL;
pgtable->mode = PAGE_MODE_3_LEVEL;
+ seqcount_init(&pgtable->seqcount);
cfg->pgsize_bitmap = amd_iommu_pgsize_bitmap;
cfg->ias = IOMMU_IN_ADDR_BIT_SIZE;