diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:04:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:04:15 -0700 |
commit | faa392181a0bd42c5478175cef601adeecdc91b6 (patch) | |
tree | e020e1142e34786676d0cd40f539bccdbb66099e /drivers/gpu/drm/i915/i915_request.c | |
parent | cfa3b8068b09f25037146bfd5eed041b78878bee (diff) | |
parent | 9ca1f474cea0edc14a1d7ec933e5472c0ff115d3 (diff) |
Merge tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Highlights:
- Core DRM had a lot of refactoring around managed drm resources to
make drivers simpler.
- Intel Tigerlake support is on by default
- amdgpu now support p2p PCI buffer sharing and encrypted GPU memory
Details:
core:
- uapi: error out EBUSY when existing master
- uapi: rework SET/DROP MASTER permission handling
- remove drm_pci.h
- drm_pci* are now legacy
- introduced managed DRM resources
- subclassing support for drm_framebuffer
- simple encoder helper
- edid improvements
- vblank + writeback documentation improved
- drm/mm - optimise tree searches
- port drivers to use devm_drm_dev_alloc
dma-buf:
- add flag for p2p buffer support
mst:
- ACT timeout improvements
- remove drm_dp_mst_has_audio
- don't use 2nd TX slot - spec recommends against it
bridge:
- dw-hdmi various improvements
- chrontel ch7033 support
- fix stack issues with old gcc
hdmi:
- add unpack function for drm infoframe
fbdev:
- misc fbdev driver fixes
i915:
- uapi: global sseu pinning
- uapi: OA buffer polling
- uapi: remove generated perf code
- uapi: per-engine default property values in sysfs
- Tigerlake GEN12 enabled.
- Lots of gem refactoring
- Tigerlake enablement patches
- move to drm_device logging
- Icelake gamma HW readout
- push MST link retrain to hotplug work
- bandwidth atomic helpers
- ICL fixes
- RPS/GT refactoring
- Cherryview full-ppgtt support
- i915 locking guidelines documented
- require linear fb stride to be 512 multiple on gen9
- Tigerlake SAGV support
amdgpu:
- uapi: encrypted GPU memory handling
- uapi: add MEM_SYNC IB flag
- p2p dma-buf support
- export VRAM dma-bufs
- FRU chip access support
- RAS/SR-IOV updates
- Powerplay locking fixes
- VCN DPG (powergating) enablement
- GFX10 clockgating fixes
- DC fixes
- GPU reset fixes
- navi SDMA fix
- expose FP16 for modesetting
- DP 1.4 compliance fixes
- gfx10 soft recovery
- Improved Critical Thermal Faults handling
- resizable BAR on gmc10
amdkfd:
- uapi: GWS resource management
- track GPU memory per process
- report PCI domain in topology
radeon:
- safe reg list generator fixes
nouveau:
- HD audio fixes on recent systems
- vGPU detection (fail probe if we're on one, for now)
- Interlaced mode fixes (mostly avoidance on Turing, which doesn't support it)
- SVM improvements/fixes
- NVIDIA format modifier support
- Misc other fixes.
adv7511:
- HDMI SPDIF support
ast:
- allocate crtc state size
- fix double assignment
- fix suspend
bochs:
- drop connector register
cirrus:
- move to tiny drivers.
exynos:
- fix imported dma-buf mapping
- enable runtime PM
- fixes and cleanups
mediatek:
- DPI pin mode swap
- config mipi_tx current/impedance
lima:
- devfreq + cooling device support
- task handling improvements
- runtime PM support
pl111:
- vexpress init improvements
- fix module auto-load
rcar-du:
- DT bindings conversion to YAML
- Planes zpos sanity check and fix
- MAINTAINERS entry for LVDS panel driver
mcde:
- fix return value
mgag200:
- use managed config init
stm:
- read endpoints from DT
vboxvideo:
- use PCI managed functions
- drop WC mtrr
vkms:
- enable cursor by default
rockchip:
- afbc support
virtio:
- various cleanups
qxl:
- fix cursor notify port
hisilicon:
- 128-byte stride alignment fix
sun4i:
- improved format handling"
* tag 'drm-next-2020-06-02' of git://anongit.freedesktop.org/drm/drm: (1401 commits)
drm/amd/display: Fix potential integer wraparound resulting in a hang
drm/amd/display: drop cursor position check in atomic test
drm/amdgpu: fix device attribute node create failed with multi gpu
drm/nouveau: use correct conflicting framebuffer API
drm/vblank: Fix -Wformat compile warnings on some arches
drm/amdgpu: Sync with VM root BO when switching VM to CPU update mode
drm/amd/display: Handle GPU reset for DC block
drm/amdgpu: add apu flags (v2)
drm/amd/powerpay: Disable gfxoff when setting manual mode on picasso and raven
drm/amdgpu: fix pm sysfs node handling (v2)
drm/amdgpu: move gpu_info parsing after common early init
drm/amdgpu: move discovery gfx config fetching
drm/nouveau/dispnv50: fix runtime pm imbalance on error
drm/nouveau: fix runtime pm imbalance on error
drm/nouveau: fix runtime pm imbalance on error
drm/nouveau/debugfs: fix runtime pm imbalance on error
drm/nouveau/nouveau/hmm: fix migrate zero page to GPU
drm/nouveau/nouveau/hmm: fix nouveau_dmem_chunk allocations
drm/nouveau/kms/nv50-: Share DP SST mode_valid() handling with MST
drm/nouveau/kms/nv50-: Move 8BPC limit for MST into nv50_mstc_get_modes()
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 144 |
1 files changed, 95 insertions, 49 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e2b78db685eae..526c1e9acbd58 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -23,6 +23,7 @@ */ #include <linux/dma-fence-array.h> +#include <linux/dma-fence-chain.h> #include <linux/irq_work.h> #include <linux/prefetch.h> #include <linux/sched.h> @@ -101,6 +102,11 @@ static signed long i915_fence_wait(struct dma_fence *fence, timeout); } +struct kmem_cache *i915_request_slab_cache(void) +{ + return global.slab_requests; +} + static void i915_fence_release(struct dma_fence *fence) { struct i915_request *rq = to_request(fence); @@ -115,6 +121,10 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->submit); i915_sw_fence_fini(&rq->semaphore); + /* Keep one request on each engine for reserved use under mempressure */ + if (!cmpxchg(&rq->engine->request_pool, NULL, rq)) + return; + kmem_cache_free(global.slab_requests, rq); } @@ -358,8 +368,6 @@ __await_execution(struct i915_request *rq, } spin_unlock_irq(&signal->lock); - /* Copy across semaphore status as we need the same behaviour */ - rq->sched.flags |= signal->sched.flags; return 0; } @@ -527,10 +535,8 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) { - request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; + if (request->sched.semaphores && i915_request_started(request)) request->sched.semaphores = 0; - } /* * We don't need to wake_up any waiters on request->execute, they @@ -588,15 +594,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void irq_semaphore_cb(struct irq_work *wrk) -{ - struct i915_request *rq = - container_of(wrk, typeof(*rq), semaphore_work); - - i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); - i915_request_put(rq); -} - static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -604,11 +601,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: - if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { - i915_request_get(rq); - init_irq_work(&rq->semaphore_work, irq_semaphore_cb); - irq_work_queue(&rq->semaphore_work); - } break; case FENCE_FREE: @@ -629,14 +621,22 @@ static void retire_requests(struct intel_timeline *tl) } static noinline struct i915_request * -request_alloc_slow(struct intel_timeline *tl, gfp_t gfp) +request_alloc_slow(struct intel_timeline *tl, + struct i915_request **rsvd, + gfp_t gfp) { struct i915_request *rq; - if (list_empty(&tl->requests)) - goto out; + /* If we cannot wait, dip into our reserves */ + if (!gfpflags_allow_blocking(gfp)) { + rq = xchg(rsvd, NULL); + if (!rq) /* Use the normal failure path for one final WARN */ + goto out; - if (!gfpflags_allow_blocking(gfp)) + return rq; + } + + if (list_empty(&tl->requests)) goto out; /* Move our oldest request to the slab-cache (if not in use!) */ @@ -721,7 +721,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq = kmem_cache_alloc(global.slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = request_alloc_slow(tl, gfp); + rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; @@ -933,6 +933,7 @@ __emit_semaphore_wait(struct i915_request *to, u32 *cs; GEM_BUG_ON(INTEL_GEN(to->i915) < 8); + GEM_BUG_ON(i915_request_has_initial_breadcrumb(to)); /* We need to pin the signaler's HWSP until we are finished reading. */ err = intel_timeline_read_hwsp(from, to, &hwsp_offset); @@ -978,13 +979,26 @@ emit_semaphore_wait(struct i915_request *to, gfp_t gfp) { const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; + struct i915_sw_fence *wait = &to->submit; if (!intel_context_use_semaphores(to->context)) goto await_fence; + if (i915_request_has_initial_breadcrumb(to)) + goto await_fence; + if (!rcu_access_pointer(from->hwsp_cacheline)) goto await_fence; + /* + * If this or its dependents are waiting on an external fence + * that may fail catastrophically, then we want to avoid using + * sempahores as they bypass the fence signaling metadata, and we + * lose the fence->error propagation. + */ + if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) + goto await_fence; + /* Just emit the first semaphore we see as request space is limited. */ if (already_busywaiting(to) & mask) goto await_fence; @@ -1000,11 +1014,10 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; to->sched.semaphores |= mask; - to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; - return 0; + wait = &to->semaphore; await_fence: - return i915_sw_fence_await_dma_fence(&to->submit, + return i915_sw_fence_await_dma_fence(wait, &from->fence, 0, I915_FENCE_GFP); } @@ -1039,15 +1052,56 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) if (ret < 0) return ret; - if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) { - ret = i915_sw_fence_await_dma_fence(&to->semaphore, - &from->fence, 0, - I915_FENCE_GFP); - if (ret < 0) - return ret; + return 0; +} + +static void mark_external(struct i915_request *rq) +{ + /* + * The downside of using semaphores is that we lose metadata passing + * along the signaling chain. This is particularly nasty when we + * need to pass along a fatal error such as EFAULT or EDEADLK. For + * fatal errors we want to scrub the request before it is executed, + * which means that we cannot preload the request onto HW and have + * it wait upon a semaphore. + */ + rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; +} + +static int +__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + mark_external(rq); + return i915_sw_fence_await_dma_fence(&rq->submit, fence, + i915_fence_context_timeout(rq->i915, + fence->context), + I915_FENCE_GFP); +} + +static int +i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence *iter; + int err = 0; + + if (!to_dma_fence_chain(fence)) + return __i915_request_await_external(rq, fence); + + dma_fence_chain_for_each(iter, fence) { + struct dma_fence_chain *chain = to_dma_fence_chain(iter); + + if (!dma_fence_is_i915(chain->fence)) { + err = __i915_request_await_external(rq, iter); + break; + } + + err = i915_request_await_dma_fence(rq, chain->fence); + if (err < 0) + break; } - return 0; + dma_fence_put(iter); + return err; } int @@ -1097,9 +1151,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) if (dma_fence_is_i915(fence)) ret = i915_request_await_request(rq, to_request(fence)); else - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - fence->context ? I915_FENCE_TIMEOUT : 0, - I915_FENCE_GFP); + ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; @@ -1179,7 +1231,8 @@ __i915_request_await_execution(struct i915_request *to, * immediate execution, and so we must wait until it reaches the * active slot. */ - if (intel_engine_has_semaphores(to->engine)) { + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); if (err < 0) return err; @@ -1225,6 +1278,9 @@ i915_request_await_execution(struct i915_request *rq, continue; } + if (fence->context == rq->fence.context) + continue; + /* * We don't squash repeated fence dependencies here as we * want to run our callback in all cases. @@ -1235,9 +1291,7 @@ i915_request_await_execution(struct i915_request *rq, to_request(fence), hook); else - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - I915_FENCE_TIMEOUT, - GFP_KERNEL); + ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; } while (--nchild); @@ -1445,14 +1499,7 @@ void i915_request_add(struct i915_request *rq) attr = ctx->sched; rcu_read_unlock(); - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; - - local_bh_disable(); __i915_request_queue(rq, &attr); - local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ mutex_unlock(&tl->mutex); } @@ -1636,7 +1683,6 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) intel_rps_boost(rq); - i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } wait.tsk = current; |