diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2019-08-15 11:10:38 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2019-08-15 11:10:38 +0200 |
commit | 4511708b9a044f2bc83c7c7f7f8a2c45ec488219 (patch) | |
tree | e892580504b082e9852f5925ba66d1e22910f5e8 /drivers/gpu/drm/msm/msm_gem.c | |
parent | 7f06d0aa530cc61cb5e048d4eb34921dee12058a (diff) | |
parent | 1cd8fa288eb83c1fe0dfa492b09d228a8d802fbf (diff) |
Merge tag 'perf-core-for-mingo-5.4-20190814' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo:
Intel PT:
Adrian Hunter:
- Add PEBS via Intel PT support, the kernel bits went via PeterZ.
perf record:
Alexander Shishkin:
- Add an option to take an AUX snapshot on exit.
Tan Xiaojun:
- Support aarch64 random socket_id assignment, just like was fixed for S/390.
tools:
Andy Shevchenko:
- Keep list of tools in alphabetical order on 'make -C tools help'.
perf session:
Arnaldo Carvalho de Melo:
- Avoid infinite loop when seeing invalid header.size, reported by
Vince Weaver using a perf.data fuzzer.
Documentation:
Vince Weaver:
- Clarify HEADER_SAMPLE_TOPOLOGY format in the perf.data spec.
perf config:
Arnaldo Carvalho de Melo:
- Honour $PERF_CONFIG env var to specify alternate .perfconfig.
perf test:
Arnaldo Carvalho de Melo:
- Disable ~/.perfconfig to get default output in 'perf trace' tests.
perf top:
Arnaldo Carvalho de Melo:
- Set display thread COMM to help with debugging.
- Collapse and resort evsels in a group, so that we have output
similar to 'perf report' when using event groups, i.e.
perf top -e '{cycles,instructions}'
Will have two columns, and the instructions one will work.
core:
Igor Lubashev:
- Detect if libcap development files are available so that we
can use capabilities to match the checks made by the kernel instead
of using plain (geteuid() == 0).
Intel:
Haiyan Song:
- Add Icelake V1.00 event file.
perf trace:
Leo Yan:
- Fix segmentation fault when access syscall info on arm64.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/msm/msm_gem.c')
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem.c | 47 |
1 files changed, 42 insertions, 5 deletions
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index c2114c748c2f..8cf6362e64bf 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -32,6 +32,46 @@ static bool use_pages(struct drm_gem_object *obj) return !msm_obj->vram_node; } +/* + * Cache sync.. this is a bit over-complicated, to fit dma-mapping + * API. Really GPU cache is out of scope here (handled on cmdstream) + * and all we need to do is invalidate newly allocated pages before + * mapping to CPU as uncached/writecombine. + * + * On top of this, we have the added headache, that depending on + * display generation, the display's iommu may be wired up to either + * the toplevel drm device (mdss), or to the mdp sub-node, meaning + * that here we either have dma-direct or iommu ops. + * + * Let this be a cautionary tail of abstraction gone wrong. + */ + +static void sync_for_device(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev)) { + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_map_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + +static void sync_for_cpu(struct msm_gem_object *msm_obj) +{ + struct device *dev = msm_obj->base.dev->dev; + + if (get_dma_ops(dev)) { + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + } +} + /* allocate pages from VRAM carveout, used when no IOMMU: */ static struct page **get_pages_vram(struct drm_gem_object *obj, int npages) { @@ -97,8 +137,7 @@ static struct page **get_pages(struct drm_gem_object *obj) * because display controller, GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + sync_for_device(msm_obj); } return msm_obj->pages; @@ -127,9 +166,7 @@ static void put_pages(struct drm_gem_object *obj) * GPU, etc. are not coherent: */ if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_sync_sg_for_cpu(obj->dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, - DMA_BIDIRECTIONAL); + sync_for_cpu(msm_obj); sg_free_table(msm_obj->sgt); kfree(msm_obj->sgt); |