diff options
author | Alex Williamson <alex.williamson@redhat.com> | 2025-05-20 08:31:44 -0600 |
---|---|---|
committer | Alex Williamson <alex.williamson@redhat.com> | 2025-05-20 08:31:44 -0600 |
commit | 6ef04555b252e60913d3dc80b45f048bfef33d0f (patch) | |
tree | 33188b728beef189402488a1c5c48f16d97aaec3 | |
parent | 1dcf2cf102d72e0bbf0d7533756c9a0ccb6e91d3 (diff) | |
parent | 3ee7d9496342246f4353716f6bbf64c945ff6e2d (diff) |
Merge branch 'dma-mapping-for-6.16-two-step-api' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux into v6.16/vfio/next
Merge two step DMA mapping API as basis for mlx5-vfio-pci uses.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-rw-r--r-- | Documentation/core-api/dma-api.rst | 71 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 482 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 84 | ||||
-rw-r--r-- | drivers/pci/p2pdma.c | 38 | ||||
-rw-r--r-- | include/linux/dma-map-ops.h | 54 | ||||
-rw-r--r-- | include/linux/dma-mapping.h | 85 | ||||
-rw-r--r-- | include/linux/iommu.h | 4 | ||||
-rw-r--r-- | include/linux/pci-p2pdma.h | 85 | ||||
-rw-r--r-- | kernel/dma/direct.c | 44 | ||||
-rw-r--r-- | kernel/dma/mapping.c | 18 |
10 files changed, 764 insertions, 201 deletions
diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 8e3cce3d0a23..2ad08517e626 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -530,6 +530,77 @@ routines, e.g.::: .... } +Part Ie - IOVA-based DMA mappings +--------------------------------- + +These APIs allow a very efficient mapping when using an IOMMU. They are an +optional path that requires extra code and are only recommended for drivers +where DMA mapping performance, or the space usage for storing the DMA addresses +matter. All the considerations from the previous section apply here as well. + +:: + + bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size); + +Is used to try to allocate IOVA space for mapping operation. If it returns +false this API can't be used for the given device and the normal streaming +DMA mapping API should be used. The ``struct dma_iova_state`` is allocated +by the driver and must be kept around until unmap time. + +:: + + static inline bool dma_use_iova(struct dma_iova_state *state) + +Can be used by the driver to check if the IOVA-based API is used after a +call to dma_iova_try_alloc. This can be useful in the unmap path. + +:: + + int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + +Is used to link ranges to the IOVA previously allocated. The start of all +but the first call to dma_iova_link for a given state must be aligned +to the DMA merge boundary returned by ``dma_get_merge_boundary())``, and +the size of all but the last range must be aligned to the DMA merge boundary +as well. + +:: + + int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size); + +Must be called to sync the IOMMU page tables for IOVA-range mapped by one or +more calls to ``dma_iova_link()``. + +For drivers that use a one-shot mapping, all ranges can be unmapped and the +IOVA freed by calling: + +:: + + void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs); + +Alternatively drivers can dynamically manage the IOVA space by unmapping +and mapping individual regions. In that case + +:: + + void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); + +is used to unmap a range previously mapped, and + +:: + + void dma_iova_free(struct device *dev, struct dma_iova_state *state); + +is used to free the IOVA space. All regions must have been unmapped using +``dma_iova_unlink()`` before calling ``dma_iova_free()``. Part II - Non-coherent DMA allocations -------------------------------------- diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a775e4dbe06f..98f7205ec8fb 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -27,6 +27,7 @@ #include <linux/msi.h> #include <linux/of_iommu.h> #include <linux/pci.h> +#include <linux/pci-p2pdma.h> #include <linux/scatterlist.h> #include <linux/spinlock.h> #include <linux/swiotlb.h> @@ -1137,6 +1138,54 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); } +static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iova_domain *iovad = &domain->iova_cookie->iovad; + + if (!is_swiotlb_active(dev)) { + dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + trace_swiotlb_bounced(dev, phys, size); + + phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir, + attrs); + + /* + * Untrusted devices should not see padding areas with random leftover + * kernel data, so zero the pre- and post-padding. + * swiotlb_tbl_map_single() has initialized the bounce buffer proper to + * the contents of the original memory buffer. + */ + if (phys != (phys_addr_t)DMA_MAPPING_ERROR && dev_is_untrusted(dev)) { + size_t start, virt = (size_t)phys_to_virt(phys); + + /* Pre-padding */ + start = iova_align_down(iovad, virt); + memset((void *)start, 0, virt - start); + + /* Post-padding */ + start = virt + size; + memset((void *)start, 0, iova_align(iovad, start) - start); + } + + return phys; +} + +/* + * Checks if a physical buffer has unaligned boundaries with respect to + * the IOMMU granule. Returns non-zero if either the start or end + * address is not aligned to the granule boundary. + */ +static inline size_t iova_unaligned(struct iova_domain *iovad, phys_addr_t phys, + size_t size) +{ + return iova_offset(iovad, phys | size); +} + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -1150,42 +1199,14 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, dma_addr_t iova, dma_mask = dma_get_mask(dev); /* - * If both the physical buffer start address and size are - * page aligned, we don't need to use a bounce page. + * If both the physical buffer start address and size are page aligned, + * we don't need to use a bounce page. */ if (dev_use_swiotlb(dev, size, dir) && - iova_offset(iovad, phys | size)) { - if (!is_swiotlb_active(dev)) { - dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); + iova_unaligned(iovad, phys, size)) { + phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs); + if (phys == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; - } - - trace_swiotlb_bounced(dev, phys, size); - - phys = swiotlb_tbl_map_single(dev, phys, size, - iova_mask(iovad), dir, attrs); - - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - /* - * Untrusted devices should not see padding areas with random - * leftover kernel data, so zero the pre- and post-padding. - * swiotlb_tbl_map_single() has initialized the bounce buffer - * proper to the contents of the original memory buffer. - */ - if (dev_is_untrusted(dev)) { - size_t start, virt = (size_t)phys_to_virt(phys); - - /* Pre-padding */ - start = iova_align_down(iovad, virt); - memset((void *)start, 0, virt - start); - - /* Post-padding */ - start = virt + size; - memset((void *)start, 0, - iova_align(iovad, start) - start); - } } if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) @@ -1359,7 +1380,6 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, struct scatterlist *s, *prev = NULL; int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); struct pci_p2pdma_map_state p2pdma_state = {}; - enum pci_p2pdma_map_type map; dma_addr_t iova; size_t iova_len = 0; unsigned long mask = dma_get_seg_boundary(dev); @@ -1389,28 +1409,30 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, size_t s_length = s->length; size_t pad_len = (mask - iova_len + 1) & mask; - if (is_pci_p2pdma_page(sg_page(s))) { - map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); - switch (map) { - case PCI_P2PDMA_MAP_BUS_ADDR: - /* - * iommu_map_sg() will skip this segment as - * it is marked as a bus address, - * __finalise_sg() will copy the dma address - * into the output segment. - */ - continue; - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * Mapping through host bridge should be - * mapped with regular IOVAs, thus we - * do nothing here and continue below. - */ - break; - default: - ret = -EREMOTEIO; - goto out_restore_sg; - } + switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(s))) { + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Mapping through host bridge should be mapped with + * regular IOVAs, thus we do nothing here and continue + * below. + */ + break; + case PCI_P2PDMA_MAP_NONE: + break; + case PCI_P2PDMA_MAP_BUS_ADDR: + /* + * iommu_map_sg() will skip this segment as it is marked + * as a bus address, __finalise_sg() will copy the dma + * address into the output segment. + */ + s->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state, + sg_phys(s)); + sg_dma_len(s) = sg->length; + sg_dma_mark_bus_address(s); + continue; + default: + ret = -EREMOTEIO; + goto out_restore_sg; } sg_dma_address(s) = s_iova_off; @@ -1721,6 +1743,354 @@ size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } +/** + * dma_iova_try_alloc - Try to allocate an IOVA space + * @dev: Device to allocate the IOVA space for + * @state: IOVA state + * @phys: physical address + * @size: IOVA size + * + * Check if @dev supports the IOVA-based DMA API, and if yes allocate IOVA space + * for the given base address and size. + * + * Note: @phys is only used to calculate the IOVA alignment. Callers that always + * do PAGE_SIZE aligned transfers can safely pass 0 here. + * + * Returns %true if the IOVA-based DMA API can be used and IOVA space has been + * allocated, or %false if the regular DMA API should be used. + */ +bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size) +{ + struct iommu_dma_cookie *cookie; + struct iommu_domain *domain; + struct iova_domain *iovad; + size_t iova_off; + dma_addr_t addr; + + memset(state, 0, sizeof(*state)); + if (!use_dma_iommu(dev)) + return false; + + domain = iommu_get_dma_domain(dev); + cookie = domain->iova_cookie; + iovad = &cookie->iovad; + iova_off = iova_offset(iovad, phys); + + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_deferred_attach(dev, iommu_get_domain_for_dev(dev))) + return false; + + if (WARN_ON_ONCE(!size)) + return false; + + /* + * DMA_IOVA_USE_SWIOTLB is flag which is set by dma-iommu + * internals, make sure that caller didn't set it and/or + * didn't use this interface to map SIZE_MAX. + */ + if (WARN_ON_ONCE((u64)size & DMA_IOVA_USE_SWIOTLB)) + return false; + + addr = iommu_dma_alloc_iova(domain, + iova_align(iovad, size + iova_off), + dma_get_mask(dev), dev); + if (!addr) + return false; + + state->addr = addr + iova_off; + state->__size = size; + return true; +} +EXPORT_SYMBOL_GPL(dma_iova_try_alloc); + +/** + * dma_iova_free - Free an IOVA space + * @dev: Device to free the IOVA space for + * @state: IOVA state + * + * Undoes a successful dma_try_iova_alloc(). + * + * Note that all dma_iova_link() calls need to be undone first. For callers + * that never call dma_iova_unlink(), dma_iova_destroy() can be used instead + * which unlinks all ranges and frees the IOVA space in a single efficient + * operation. + */ +void dma_iova_free(struct device *dev, struct dma_iova_state *state) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, state->addr); + size_t size = dma_iova_size(state); + + iommu_dma_free_iova(domain, state->addr - iova_start_pad, + iova_align(iovad, size + iova_start_pad), NULL); +} +EXPORT_SYMBOL_GPL(dma_iova_free); + +static int __dma_iova_link(struct device *dev, dma_addr_t addr, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + bool coherent = dev_is_dma_coherent(dev); + + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + + return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size, + dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC); +} + +static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr, + phys_addr_t phys, size_t bounce_len, + enum dma_data_direction dir, unsigned long attrs, + size_t iova_start_pad) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iova_domain *iovad = &domain->iova_cookie->iovad; + phys_addr_t bounce_phys; + int error; + + bounce_phys = iommu_dma_map_swiotlb(dev, phys, bounce_len, dir, attrs); + if (bounce_phys == DMA_MAPPING_ERROR) + return -ENOMEM; + + error = __dma_iova_link(dev, addr - iova_start_pad, + bounce_phys - iova_start_pad, + iova_align(iovad, bounce_len), dir, attrs); + if (error) + swiotlb_tbl_unmap_single(dev, bounce_phys, bounce_len, dir, + attrs); + return error; +} + +static int iommu_dma_iova_link_swiotlb(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, phys); + size_t iova_end_pad = iova_offset(iovad, phys + size); + dma_addr_t addr = state->addr + offset; + size_t mapped = 0; + int error; + + if (iova_start_pad) { + size_t bounce_len = min(size, iovad->granule - iova_start_pad); + + error = iommu_dma_iova_bounce_and_link(dev, addr, phys, + bounce_len, dir, attrs, iova_start_pad); + if (error) + return error; + state->__size |= DMA_IOVA_USE_SWIOTLB; + + mapped += bounce_len; + size -= bounce_len; + if (!size) + return 0; + } + + size -= iova_end_pad; + error = __dma_iova_link(dev, addr + mapped, phys + mapped, size, dir, + attrs); + if (error) + goto out_unmap; + mapped += size; + + if (iova_end_pad) { + error = iommu_dma_iova_bounce_and_link(dev, addr + mapped, + phys + mapped, iova_end_pad, dir, attrs, 0); + if (error) + goto out_unmap; + state->__size |= DMA_IOVA_USE_SWIOTLB; + } + + return 0; + +out_unmap: + dma_iova_unlink(dev, state, 0, mapped, dir, attrs); + return error; +} + +/** + * dma_iova_link - Link a range of IOVA space + * @dev: DMA device + * @state: IOVA state + * @phys: physical address to link + * @offset: offset into the IOVA state to map into + * @size: size of the buffer + * @dir: DMA direction + * @attrs: attributes of mapping properties + * + * Link a range of IOVA space for the given IOVA state without IOTLB sync. + * This function is used to link multiple physical addresses in contiguous + * IOVA space without performing costly IOTLB sync. + * + * The caller is responsible to call to dma_iova_sync() to sync IOTLB at + * the end of linkage. + */ +int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, phys); + + if (WARN_ON_ONCE(iova_start_pad && offset > 0)) + return -EIO; + + if (dev_use_swiotlb(dev, size, dir) && + iova_unaligned(iovad, phys, size)) + return iommu_dma_iova_link_swiotlb(dev, state, phys, offset, + size, dir, attrs); + + return __dma_iova_link(dev, state->addr + offset - iova_start_pad, + phys - iova_start_pad, + iova_align(iovad, size + iova_start_pad), dir, attrs); +} +EXPORT_SYMBOL_GPL(dma_iova_link); + +/** + * dma_iova_sync - Sync IOTLB + * @dev: DMA device + * @state: IOVA state + * @offset: offset into the IOVA state to sync + * @size: size of the buffer + * + * Sync IOTLB for the given IOVA state. This function should be called on + * the IOVA-contiguous range created by one ore more dma_iova_link() calls + * to sync the IOTLB. + */ +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + dma_addr_t addr = state->addr + offset; + size_t iova_start_pad = iova_offset(iovad, addr); + + return iommu_sync_map(domain, addr - iova_start_pad, + iova_align(iovad, size + iova_start_pad)); +} +EXPORT_SYMBOL_GPL(dma_iova_sync); + +static void iommu_dma_iova_unlink_range_slow(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, addr); + dma_addr_t end = addr + size; + + do { + phys_addr_t phys; + size_t len; + + phys = iommu_iova_to_phys(domain, addr); + if (WARN_ON(!phys)) + /* Something very horrible happen here */ + return; + + len = min_t(size_t, + end - addr, iovad->granule - iova_start_pad); + + if (!dev_is_dma_coherent(dev) && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_cpu(phys, len, dir); + + swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs); + + addr += len; + iova_start_pad = 0; + } while (addr < end); +} + +static void __iommu_dma_iova_unlink(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs, + bool free_iova) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + dma_addr_t addr = state->addr + offset; + size_t iova_start_pad = iova_offset(iovad, addr); + struct iommu_iotlb_gather iotlb_gather; + size_t unmapped; + + if ((state->__size & DMA_IOVA_USE_SWIOTLB) || + (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))) + iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs); + + iommu_iotlb_gather_init(&iotlb_gather); + iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain); + + size = iova_align(iovad, size + iova_start_pad); + addr -= iova_start_pad; + unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather); + WARN_ON(unmapped != size); + + if (!iotlb_gather.queued) + iommu_iotlb_sync(domain, &iotlb_gather); + if (free_iova) + iommu_dma_free_iova(domain, addr, size, &iotlb_gather); +} + +/** + * dma_iova_unlink - Unlink a range of IOVA space + * @dev: DMA device + * @state: IOVA state + * @offset: offset into the IOVA state to unlink + * @size: size of the buffer + * @dir: DMA direction + * @attrs: attributes of mapping properties + * + * Unlink a range of IOVA space for the given IOVA state. + */ +void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + __iommu_dma_iova_unlink(dev, state, offset, size, dir, attrs, false); +} +EXPORT_SYMBOL_GPL(dma_iova_unlink); + +/** + * dma_iova_destroy - Finish a DMA mapping transaction + * @dev: DMA device + * @state: IOVA state + * @mapped_len: number of bytes to unmap + * @dir: DMA direction + * @attrs: attributes of mapping properties + * + * Unlink the IOVA range up to @mapped_len and free the entire IOVA space. The + * range of IOVA from dma_addr to @mapped_len must all be linked, and be the + * only linked IOVA in state. + */ +void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs) +{ + if (mapped_len) + __iommu_dma_iova_unlink(dev, state, 0, mapped_len, dir, attrs, + true); + else + /* + * We can be here if first call to dma_iova_link() failed and + * there is nothing to unlink, so let's be more clear. + */ + dma_iova_free(dev, state); +} +EXPORT_SYMBOL_GPL(dma_iova_destroy); + void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4f91a740c15f..8619c355ef9c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2443,8 +2443,8 @@ out_set_count: return pgsize; } -static int __iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; @@ -2453,12 +2453,19 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t orig_paddr = paddr; int ret = 0; + might_sleep_if(gfpflags_allow_blocking(gfp)); + if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) return -EINVAL; if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) return -ENODEV; + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + /* find out the minimum page size supported */ min_pagesz = 1 << __ffs(domain->pgsize_bitmap); @@ -2506,31 +2513,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { const struct iommu_domain_ops *ops = domain->ops; - int ret; - might_sleep_if(gfpflags_allow_blocking(gfp)); - - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; + if (!ops->iotlb_sync_map) + return 0; + return ops->iotlb_sync_map(domain, iova, size); +} - ret = __iommu_map(domain, iova, paddr, size, prot, gfp); - if (ret == 0 && ops->iotlb_sync_map) { - ret = ops->iotlb_sync_map(domain, iova, size); - if (ret) - goto out_err; - } +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + int ret; - return ret; + ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); + if (ret) + return ret; -out_err: - /* undo mappings already done */ - iommu_unmap(domain, iova, size); + ret = iommu_sync_map(domain, iova, size); + if (ret) + iommu_unmap(domain, iova, size); return ret; } @@ -2618,6 +2621,25 @@ size_t iommu_unmap(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap); +/** + * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync + * @domain: Domain to manipulate + * @iova: IO virtual address to start + * @size: Length of the range starting from @iova + * @iotlb_gather: range information for a pending IOTLB flush + * + * iommu_unmap_fast() will remove a translation created by iommu_map(). + * It can't subdivide a mapping created by iommu_map(), so it should be + * called with IOVA ranges that match what was passed to iommu_map(). The + * range can aggregate contiguous iommu_map() calls so long as no individual + * range is split. + * + * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers + * which manage the IOTLB flushing externally to perform a batched sync. + * + * Returns: Number of bytes of IOVA unmapped. iova + res will be the point + * unmapping stopped. + */ size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2630,26 +2652,17 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { - const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; int ret; - might_sleep_if(gfpflags_allow_blocking(gfp)); - - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; - while (i <= nents) { phys_addr_t s_phys = sg_phys(sg); if (len && s_phys != start + len) { - ret = __iommu_map(domain, iova + mapped, start, + ret = iommu_map_nosync(domain, iova + mapped, start, len, prot, gfp); - if (ret) goto out_err; @@ -2672,11 +2685,10 @@ next: sg = sg_next(sg); } - if (ops->iotlb_sync_map) { - ret = ops->iotlb_sync_map(domain, iova, mapped); - if (ret) - goto out_err; - } + ret = iommu_sync_map(domain, iova, mapped); + if (ret) + goto out_err; + return mapped; out_err: diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 19214ec81fbb..8d955c25aed3 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -1004,40 +1004,12 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, return type; } -/** - * pci_p2pdma_map_segment - map an sg segment determining the mapping type - * @state: State structure that should be declared outside of the for_each_sg() - * loop and initialized to zero. - * @dev: DMA device that's doing the mapping operation - * @sg: scatterlist segment to map - * - * This is a helper to be used by non-IOMMU dma_map_sg() implementations where - * the sg segment is the same for the page_link and the dma_address. - * - * Attempt to map a single segment in an SGL with the PCI bus address. - * The segment must point to a PCI P2PDMA page and thus must be - * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check. - * - * Returns the type of mapping used and maps the page if the type is - * PCI_P2PDMA_MAP_BUS_ADDR. - */ -enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg) +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page) { - if (state->pgmap != page_pgmap(sg_page(sg))) { - state->pgmap = page_pgmap(sg_page(sg)); - state->map = pci_p2pdma_map_type(state->pgmap, dev); - state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; - } - - if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) { - sg->dma_address = sg_phys(sg) + state->bus_off; - sg_dma_len(sg) = sg->length; - sg_dma_mark_bus_address(sg); - } - - return state->map; + state->pgmap = page_pgmap(page); + state->map = pci_p2pdma_map_type(state->pgmap, dev); + state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; } /** diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index e172522cd936..f48e5fb88bd5 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -434,58 +434,4 @@ static inline void debug_dma_dump_mappings(struct device *dev) #endif /* CONFIG_DMA_API_DEBUG */ extern const struct dma_map_ops dma_dummy_ops; - -enum pci_p2pdma_map_type { - /* - * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping - * type hasn't been calculated yet. Functions that return this enum - * never return this value. - */ - PCI_P2PDMA_MAP_UNKNOWN = 0, - - /* - * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will - * traverse the host bridge and the host bridge is not in the - * allowlist. DMA Mapping routines should return an error when - * this is returned. - */ - PCI_P2PDMA_MAP_NOT_SUPPORTED, - - /* - * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to - * each other directly through a PCI switch and the transaction will - * not traverse the host bridge. Such a mapping should program - * the DMA engine with PCI bus addresses. - */ - PCI_P2PDMA_MAP_BUS_ADDR, - - /* - * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk - * to each other, but the transaction traverses a host bridge on the - * allowlist. In this case, a normal mapping either with CPU physical - * addresses (in the case of dma-direct) or IOVA addresses (in the - * case of IOMMUs) should be used to program the DMA engine. - */ - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - -struct pci_p2pdma_map_state { - struct dev_pagemap *pgmap; - int map; - u64 bus_off; -}; - -#ifdef CONFIG_PCI_P2PDMA -enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg); -#else /* CONFIG_PCI_P2PDMA */ -static inline enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg) -{ - return PCI_P2PDMA_MAP_NOT_SUPPORTED; -} -#endif /* CONFIG_PCI_P2PDMA */ - #endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 85ab710ec0e7..55c03e5fe8cb 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -72,6 +72,22 @@ #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +struct dma_iova_state { + dma_addr_t addr; + u64 __size; +}; + +/* + * Use the high bit to mark if we used swiotlb for one or more ranges. + */ +#define DMA_IOVA_USE_SWIOTLB (1ULL << 63) + +static inline size_t dma_iova_size(struct dma_iova_state *state) +{ + /* Casting is needed for 32-bits systems */ + return (size_t)(state->__size & ~DMA_IOVA_USE_SWIOTLB); +} + #ifdef CONFIG_DMA_API_DEBUG void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); void debug_dma_map_single(struct device *dev, const void *addr, @@ -277,6 +293,70 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#ifdef CONFIG_IOMMU_DMA +/** + * dma_use_iova - check if the IOVA API is used for this state + * @state: IOVA state + * + * Return %true if the DMA transfers uses the dma_iova_*() calls or %false if + * they can't be used. + */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return state->__size != 0; +} + +bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size); +void dma_iova_free(struct device *dev, struct dma_iova_state *state); +void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs); +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size); +int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +#else /* CONFIG_IOMMU_DMA */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return false; +} +static inline bool dma_iova_try_alloc(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t size) +{ + return false; +} +static inline void dma_iova_free(struct device *dev, + struct dma_iova_state *state) +{ +} +static inline void dma_iova_destroy(struct device *dev, + struct dma_iova_state *state, size_t mapped_len, + enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline int dma_iova_sync(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size) +{ + return -EOPNOTSUPP; +} +static inline int dma_iova_link(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return -EOPNOTSUPP; +} +static inline void dma_iova_unlink(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ +} +#endif /* CONFIG_IOMMU_DMA */ + #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); @@ -326,6 +406,7 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; } +bool dma_need_unmap(struct device *dev); #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ static inline bool dma_dev_need_sync(const struct device *dev) { @@ -351,6 +432,10 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; } +static inline bool dma_need_unmap(struct device *dev) +{ + return false; +} #endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ struct page *dma_alloc_pages(struct device *dev, size_t size, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3a8d35d41fda..15cdadace993 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -872,6 +872,10 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 2c07aa6b7665..075c20b161d9 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -104,4 +104,89 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before + * the mapping type has been calculated. Exported routines for the API + * will never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * Not a PCI P2PDMA transfer. + */ + PCI_P2PDMA_MAP_NONE, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + +struct pci_p2pdma_map_state { + struct dev_pagemap *pgmap; + enum pci_p2pdma_map_type map; + u64 bus_off; +}; + +/* helper for pci_p2pdma_state(), do not use directly */ +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page); + +/** + * pci_p2pdma_state - check the P2P transfer state of a page + * @state: P2P state structure + * @dev: device to transfer to/from + * @page: page to map + * + * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the + * map type, and updates @state with all information needed for a P2P transfer. + */ +static inline enum pci_p2pdma_map_type +pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, + struct page *page) +{ + if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { + if (state->pgmap != page_pgmap(page)) + __pci_p2pdma_update_state(state, dev, page); + return state->map; + } + return PCI_P2PDMA_MAP_NONE; +} + +/** + * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address + * for a PCI_P2PDMA_MAP_BUS_ADDR transfer. + * @state: P2P state structure + * @paddr: physical address to map + * + * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. + */ +static inline dma_addr_t +pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) +{ + WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); + return paddr + state->bus_off; +} + #endif /* _LINUX_PCI_P2P_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index b8fe0b3d0ffb..24c359d9c879 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -13,6 +13,7 @@ #include <linux/vmalloc.h> #include <linux/set_memory.h> #include <linux/slab.h> +#include <linux/pci-p2pdma.h> #include "direct.h" /* @@ -462,34 +463,33 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct pci_p2pdma_map_state p2pdma_state = {}; - enum pci_p2pdma_map_type map; struct scatterlist *sg; int i, ret; for_each_sg(sgl, sg, nents, i) { - if (is_pci_p2pdma_page(sg_page(sg))) { - map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg); - switch (map) { - case PCI_P2PDMA_MAP_BUS_ADDR: - continue; - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * Any P2P mapping that traverses the PCI - * host bridge must be mapped with CPU physical - * address and not PCI bus addresses. This is - * done with dma_direct_map_page() below. - */ - break; - default: - ret = -EREMOTEIO; + switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Any P2P mapping that traverses the PCI host bridge + * must be mapped with CPU physical address and not PCI + * bus addresses. + */ + break; + case PCI_P2PDMA_MAP_NONE: + sg->dma_address = dma_direct_map_page(dev, sg_page(sg), + sg->offset, sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) { + ret = -EIO; goto out_unmap; } - } - - sg->dma_address = dma_direct_map_page(dev, sg_page(sg), - sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) { - ret = -EIO; + break; + case PCI_P2PDMA_MAP_BUS_ADDR: + sg->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state, + sg_phys(sg)); + sg_dma_mark_bus_address(sg); + continue; + default: + ret = -EREMOTEIO; goto out_unmap; } sg_dma_len(sg) = sg->length; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 051a32988040..107e4a4d251d 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -443,6 +443,24 @@ bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) } EXPORT_SYMBOL_GPL(__dma_need_sync); +/** + * dma_need_unmap - does this device need dma_unmap_* operations + * @dev: device to check + * + * If this function returns %false, drivers can skip calling dma_unmap_* after + * finishing an I/O. This function must be called after all mappings that might + * need to be unmapped have been performed. + */ +bool dma_need_unmap(struct device *dev) +{ + if (!dma_map_direct(dev, get_dma_ops(dev))) + return true; + if (!dev->dma_skip_sync) + return true; + return IS_ENABLED(CONFIG_DMA_API_DEBUG); +} +EXPORT_SYMBOL_GPL(dma_need_unmap); + static void dma_setup_need_sync(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); |