Skip to content

Commit 4f7995c

Browse files
Andrey Grodzovskygregkh
Andrey Grodzovsky
authored andcommitted
drm/sched: Avoid lockdep spalt on killing a processes
[ Upstream commit 542cff7 ] Probelm: Singlaning one sched fence from within another's sched fence singal callback generates lockdep splat because the both have same lockdep class of their fence->lock Fix: Fix bellow stack by rescheduling to irq work of signaling and killing of jobs that left when entity is killed. [11176.741181] dump_stack+0x10/0x12 [11176.741186] __lock_acquire.cold+0x208/0x2df [11176.741197] lock_acquire+0xc6/0x2d0 [11176.741204] ? dma_fence_signal+0x28/0x80 [11176.741212] _raw_spin_lock_irqsave+0x4d/0x70 [11176.741219] ? dma_fence_signal+0x28/0x80 [11176.741225] dma_fence_signal+0x28/0x80 [11176.741230] drm_sched_fence_finished+0x12/0x20 [gpu_sched] [11176.741240] drm_sched_entity_kill_jobs_cb+0x1c/0x50 [gpu_sched] [11176.741248] dma_fence_signal_timestamp_locked+0xac/0x1a0 [11176.741254] dma_fence_signal+0x3b/0x80 [11176.741260] drm_sched_fence_finished+0x12/0x20 [gpu_sched] [11176.741268] drm_sched_job_done.isra.0+0x7f/0x1a0 [gpu_sched] [11176.741277] drm_sched_job_done_cb+0x12/0x20 [gpu_sched] [11176.741284] dma_fence_signal_timestamp_locked+0xac/0x1a0 [11176.741290] dma_fence_signal+0x3b/0x80 [11176.741296] amdgpu_fence_process+0xd1/0x140 [amdgpu] [11176.741504] sdma_v4_0_process_trap_irq+0x8c/0xb0 [amdgpu] [11176.741731] amdgpu_irq_dispatch+0xce/0x250 [amdgpu] [11176.741954] amdgpu_ih_process+0x81/0x100 [amdgpu] [11176.742174] amdgpu_irq_handler+0x26/0xa0 [amdgpu] [11176.742393] __handle_irq_event_percpu+0x4f/0x2c0 [11176.742402] handle_irq_event_percpu+0x33/0x80 [11176.742408] handle_irq_event+0x39/0x60 [11176.742414] handle_edge_irq+0x93/0x1d0 [11176.742419] __common_interrupt+0x50/0xe0 [11176.742426] common_interrupt+0x80/0x90 Signed-off-by: Andrey Grodzovsky <[email protected]> Suggested-by: Daniel Vetter <[email protected]> Suggested-by: Christian König <[email protected]> Tested-by: Christian König <[email protected]> Reviewed-by: Christian König <[email protected]> Link: https://www.spinics.net/lists/dri-devel/msg321250.html Signed-off-by: Sasha Levin <[email protected]>
1 parent 0406b8b commit 4f7995c

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

drivers/gpu/drm/scheduler/sched_entity.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,25 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
190190
}
191191
EXPORT_SYMBOL(drm_sched_entity_flush);
192192

193+
static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
194+
{
195+
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
196+
197+
drm_sched_fence_finished(job->s_fence);
198+
WARN_ON(job->s_fence->parent);
199+
job->sched->ops->free_job(job);
200+
}
201+
202+
193203
/* Signal the scheduler finished fence when the entity in question is killed. */
194204
static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
195205
struct dma_fence_cb *cb)
196206
{
197207
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
198208
finish_cb);
199209

200-
drm_sched_fence_finished(job->s_fence);
201-
WARN_ON(job->s_fence->parent);
202-
job->sched->ops->free_job(job);
210+
init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
211+
irq_work_queue(&job->work);
203212
}
204213

205214
static struct dma_fence *

include/drm/gpu_scheduler.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <linux/dma-fence.h>
2929
#include <linux/completion.h>
3030
#include <linux/xarray.h>
31+
#include <linux/irq_work.h>
3132

3233
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
3334

@@ -286,7 +287,16 @@ struct drm_sched_job {
286287
struct list_head list;
287288
struct drm_gpu_scheduler *sched;
288289
struct drm_sched_fence *s_fence;
289-
struct dma_fence_cb finish_cb;
290+
291+
/*
292+
* work is used only after finish_cb has been used and will not be
293+
* accessed anymore.
294+
*/
295+
union {
296+
struct dma_fence_cb finish_cb;
297+
struct irq_work work;
298+
};
299+
290300
uint64_t id;
291301
atomic_t karma;
292302
enum drm_sched_priority s_priority;

0 commit comments

Comments
 (0)