summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2025-09-06 22:52:54 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-09-09 18:58:22 +0200
commit607b2bf5708fe657c85271edc4dc4226323068f9 (patch)
tree8088c95a97cc40d1c551915cb228c50553153baa
parentdda6ec365ab04067adae40ef17015db447e90736 (diff)
nouveau: fix disabling the nonstall irq due to storm code
[ Upstream commit 0ef5c4e4dbbfcebaa9b2eca18097b43016727dfe ] Nouveau has code that when it gets an IRQ with no allowed handler it disables it to avoid storms. However with nonstall interrupts, we often disable them from the drm driver, but still request their emission via the push submission. Just don't disable nonstall irqs ever in normal operation, the event handling code will filter them out, and the driver will just enable/disable them at load time. This fixes timeouts we've been seeing on/off for a long time, but they became a lot more noticeable on Blackwell. This doesn't fix all of them, there is a subsequent fence emission fix to fix the last few. Fixes: 3ebd64aa3c4f ("drm/nouveau/intr: support multiple trees, and explicit interfaces") Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://lore.kernel.org/r/20250829021633.1674524-1-airlied@gmail.com [ Fix a typo and a minor checkpatch.pl warning; remove "v2" from commit subject. - Danilo ] Signed-off-by: Danilo Krummrich <dakr@kernel.org> [ Apply to drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c ] Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c23
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c1
5 files changed, 21 insertions, 8 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 22443fe4a39f..d0092b293090 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -352,6 +352,8 @@ nvkm_fifo_dtor(struct nvkm_engine *engine)
mutex_destroy(&fifo->userd.mutex);
nvkm_event_fini(&fifo->nonstall.event);
+ if (fifo->func->nonstall_dtor)
+ fifo->func->nonstall_dtor(fifo);
mutex_destroy(&fifo->mutex);
if (fifo->func->dtor)
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
index e74493a4569e..6848a56f20c0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
@@ -517,19 +517,11 @@ ga100_fifo_nonstall_intr(struct nvkm_inth *inth)
static void
ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
{
- struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
- struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0);
-
- nvkm_inth_block(&runl->nonstall.inth);
}
static void
ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
{
- struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
- struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0);
-
- nvkm_inth_allow(&runl->nonstall.inth);
}
const struct nvkm_event_func
@@ -564,12 +556,26 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
if (ret)
return ret;
+ nvkm_inth_allow(&runl->nonstall.inth);
+
nr = max(nr, runl->id + 1);
}
return nr;
}
+void
+ga100_fifo_nonstall_dtor(struct nvkm_fifo *fifo)
+{
+ struct nvkm_runl *runl;
+
+ nvkm_runl_foreach(runl, fifo) {
+ if (runl->nonstall.vector < 0)
+ continue;
+ nvkm_inth_block(&runl->nonstall.inth);
+ }
+}
+
int
ga100_fifo_runl_ctor(struct nvkm_fifo *fifo)
{
@@ -599,6 +605,7 @@ ga100_fifo = {
.runl_ctor = ga100_fifo_runl_ctor,
.mmu_fault = &tu102_fifo_mmu_fault,
.nonstall_ctor = ga100_fifo_nonstall_ctor,
+ .nonstall_dtor = ga100_fifo_nonstall_dtor,
.nonstall = &ga100_fifo_nonstall,
.runl = &ga100_runl,
.runq = &ga100_runq,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
index 755235f55b3a..18a0b1f4eab7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
@@ -30,6 +30,7 @@ ga102_fifo = {
.runl_ctor = ga100_fifo_runl_ctor,
.mmu_fault = &tu102_fifo_mmu_fault,
.nonstall_ctor = ga100_fifo_nonstall_ctor,
+ .nonstall_dtor = ga100_fifo_nonstall_dtor,
.nonstall = &ga100_fifo_nonstall,
.runl = &ga100_runl,
.runq = &ga100_runq,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index a0f3277605a5..c5ecbcae2967 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -40,6 +40,7 @@ struct nvkm_fifo_func {
void (*start)(struct nvkm_fifo *, unsigned long *);
int (*nonstall_ctor)(struct nvkm_fifo *);
+ void (*nonstall_dtor)(struct nvkm_fifo *);
const struct nvkm_event_func *nonstall;
const struct nvkm_runl_func *runl;
@@ -198,6 +199,7 @@ extern const struct nvkm_fifo_func_mmu_fault tu102_fifo_mmu_fault;
int ga100_fifo_runl_ctor(struct nvkm_fifo *);
int ga100_fifo_nonstall_ctor(struct nvkm_fifo *);
+void ga100_fifo_nonstall_dtor(struct nvkm_fifo *);
extern const struct nvkm_event_func ga100_fifo_nonstall;
extern const struct nvkm_runl_func ga100_runl;
extern const struct nvkm_runq_func ga100_runq;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
index 3454c7d29502..f978e49a4d54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
@@ -660,6 +660,7 @@ r535_fifo_new(const struct nvkm_fifo_func *hw, struct nvkm_device *device,
rm->chan.func = &r535_chan;
rm->nonstall = &ga100_fifo_nonstall;
rm->nonstall_ctor = ga100_fifo_nonstall_ctor;
+ rm->nonstall_dtor = ga100_fifo_nonstall_dtor;
return nvkm_fifo_new_(rm, device, type, inst, pfifo);
}