changes in drm scheduler introduced by commit 2fe205008e9b70c67a9f3502831074ff36b00093 cause system lock-up

changed the description

Can you provide the patch information for 2fe205008e9b70c67a9f3502831074ff36b00093? I don't see that hash in any tree off hand.

It's in the linux-next tree from kernel.org:

commit 2fe205008e9b70c67a9f3502831074ff36b00093 (HEAD)
Merge: 94ab2bb1e384 39dd0cc2e5bd
Author: Stephen Rothwell <sfr@canb.auug.org.au>
Date:   Fri Sep 23 11:31:32 2022 +1000

    Merge branch 'for-linux-next' of git://anongit.freedesktop.org/drm/drm-misc
    
    # Conflicts:
    #       drivers/gpu/drm/gma500/power.c

This drm/sched: Use parent fence instead of finished seems to be the commit that introduced the error in the drm-misc tree.

This commit drm/sched: add missing NULL check in drm_sched_get_cleanup_job v2 does not fix my error, linux-next-20221006 still crashes.

This could be related: https://lkml.org/lkml/2022/10/6/825 Posting this here because I failed to reply to the thread on lkml (always got a new thread, don't want to spam)

I managed to get an error message: (had to hand copy it from the console):

BUG: Kernel NULL pointer dereference, address 0000000000000088
drm_sched_job_done.isra.0 + 0x4/0xd0 [gpu_sched]
Call Trace:
dma_fence_signal_timestamp_locked + 0x6f/0xb0 // This calls the callback functions of a fence
...

If one now takes a look at the dissassembly of drm_sched_job_done.isra.0

000000000000910 <drm_sched_job_done.isra.0>:
     910:       41 54                   push   %r12
     912:       55                      push   %rbp
     913:       53                      push   %rbx
     914:       48 8b af 88 00 00 00    mov    0x88(%rdi),%rbp // %rdi contains the first argument of a function
     91b:       f0 ff 8d 30 01 00 00    lock decl 0x130(%rbp)
[...]

it shows that drm_sched_job_done is called with a NULL argument.

Edit: Even though I managed to trigger the bug several times while watching the console I've only seen this error message once.

Edit2: Almost the same error message as here https://lkml.org/lkml/2022/10/6/1001 but with sdma_v5_2 instead of sdma_v3_0

I've tried this patch (a modified version of https://lkml.org/lkml/2022/9/30/806)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 4cc59bae38dd..6a8a8cd236e5 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -259,8 +259,20 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
  */
 static void drm_sched_job_done(struct drm_sched_job *s_job)
 {
+       if (!s_job) {
+               printk(KERN_INFO "Warning: Called drm_sched_job_done for NULL argument\n");
+               return;
+       }
        struct drm_sched_fence *s_fence = s_job->s_fence;
+       if (!s_fence) {
+               printk(KERN_INFO "Warning: s_job->s_fence == NULL in drm_sched_job_done\n");
+               return;
+       }
        struct drm_gpu_scheduler *sched = s_fence->sched;
+       if (!sched) {
+               printk(KERN_INFO "Warning: s_fence->sched == NULL in drm_sched_job_done\n");
+               return;
+       }
 
        atomic_dec(&sched->hw_rq_count);
        atomic_dec(sched->score);
@@ -283,6 +295,7 @@ static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
        struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 
        drm_sched_job_done(s_job);
+
 }
 
 /**

This still leads to a lockup of the screen when playing Civilization 6, the difference is that magic sysrq works to sync and reboot. These are the last messages I get:

Oct  8 11:44:33 lisa kernel: [  276.181882] Warning: s_job->s_fence == NULL in drm_sched_job_done
Oct  8 11:44:40 lisa kernel: [  283.426667] amdgpu 0000:03:00.0: amdgpu: free PSP TMR buffer
Oct  8 11:44:48 lisa kernel: [  291.363289] pci_bus 0000:03: Allocating resources
Oct  8 11:44:48 lisa kernel: [  291.363532] [drm] PCIE GART of 512M enabled (table at 0x00000081FEB00000).
Oct  8 11:44:48 lisa kernel: [  291.363548] [drm] PSP is resuming...
Oct  8 11:44:49 lisa kernel: [  291.534210] [drm] reserve 0xa00000 from 0x81fd000000 for PSP TMR
Oct  8 11:44:49 lisa kernel: [  291.604079] amdgpu 0000:03:00.0: amdgpu: RAS: optional ras ta ucode is not available
Oct  8 11:44:49 lisa kernel: [  291.613214] amdgpu 0000:03:00.0: amdgpu: SECUREDISPLAY: securedisplay ta ucode is not available
Oct  8 11:44:49 lisa kernel: [  291.613217] amdgpu 0000:03:00.0: amdgpu: SMU is resuming...
Oct  8 11:44:49 lisa kernel: [  291.613220] amdgpu 0000:03:00.0: amdgpu: smu driver if version = 0x0000000f, smu fw if version = 0x00000012, smu fw program = 0, version = 0x003b2200 (59.34.0)
Oct  8 11:44:49 lisa kernel: [  291.613223] amdgpu 0000:03:00.0: amdgpu: SMU driver if version not matched
Oct  8 11:44:49 lisa kernel: [  291.613271] amdgpu 0000:03:00.0: amdgpu: use vbios provided pptable
Oct  8 11:44:49 lisa kernel: [  291.664722] amdgpu 0000:03:00.0: amdgpu: SMU is resumed successfully!
Oct  8 11:44:49 lisa kernel: [  291.666208] [drm] DMUB hardware initialized: version=0x02020003
Oct  8 11:44:49 lisa kernel: [  291.704538] [drm] kiq ring mec 2 pipe 1 q 0
Oct  8 11:44:49 lisa kernel: [  291.708839] [drm] VCN decode and encode initialized successfully(under DPG Mode).
Oct  8 11:44:49 lisa kernel: [  291.709179] [drm] JPEG decode initialized successfully.
Oct  8 11:44:49 lisa kernel: [  291.709203] amdgpu 0000:03:00.0: amdgpu: ring gfx_0.0.0 uses VM inv eng 0 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709205] amdgpu 0000:03:00.0: amdgpu: ring comp_1.0.0 uses VM inv eng 1 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709206] amdgpu 0000:03:00.0: amdgpu: ring comp_1.1.0 uses VM inv eng 4 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709206] amdgpu 0000:03:00.0: amdgpu: ring comp_1.2.0 uses VM inv eng 5 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709207] amdgpu 0000:03:00.0: amdgpu: ring comp_1.3.0 uses VM inv eng 6 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709207] amdgpu 0000:03:00.0: amdgpu: ring comp_1.0.1 uses VM inv eng 7 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709208] amdgpu 0000:03:00.0: amdgpu: ring comp_1.1.1 uses VM inv eng 8 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709209] amdgpu 0000:03:00.0: amdgpu: ring comp_1.2.1 uses VM inv eng 9 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709209] amdgpu 0000:03:00.0: amdgpu: ring comp_1.3.1 uses VM inv eng 10 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709210] amdgpu 0000:03:00.0: amdgpu: ring kiq_2.1.0 uses VM inv eng 11 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709210] amdgpu 0000:03:00.0: amdgpu: ring sdma0 uses VM inv eng 12 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709211] amdgpu 0000:03:00.0: amdgpu: ring sdma1 uses VM inv eng 13 on hub 0
Oct  8 11:44:49 lisa kernel: [  291.709212] amdgpu 0000:03:00.0: amdgpu: ring vcn_dec_0 uses VM inv eng 0 on hub 1
Oct  8 11:44:49 lisa kernel: [  291.709212] amdgpu 0000:03:00.0: amdgpu: ring vcn_enc_0.0 uses VM inv eng 1 on hub 1
Oct  8 11:44:49 lisa kernel: [  291.709213] amdgpu 0000:03:00.0: amdgpu: ring vcn_enc_0.1 uses VM inv eng 4 on hub 1
Oct  8 11:44:49 lisa kernel: [  291.709214] amdgpu 0000:03:00.0: amdgpu: ring jpeg_dec uses VM inv eng 5 on hub 1
Oct  8 11:44:49 lisa kernel: [  291.712440] amdgpu 0000:03:00.0: [drm] Cannot find any crtc or sizes
Oct  8 11:44:49 lisa kernel: [  291.712443] amdgpu 0000:03:00.0: [drm] Cannot find any crtc or sizes
Oct  8 11:44:54 lisa kernel: [  296.999333] amdgpu 0000:03:00.0: amdgpu: free PSP TMR buffer
Oct  8 11:45:06 lisa kernel: [  309.157791] sysrq: Emergency Sync
Oct  8 11:45:06 lisa kernel: [  309.158017] Emergency Sync complete

Edit: One can also use ssh to kill processes, but the screen is still stuck (even after gdm3 is killed).

I think this was caused by a race between the callback function of the parent fence (drm_sched_job_done) and drm_sched_job_cleanup which was called to early as dma_fence_is_signaled only checks the signaled bit, not if the callbacks have returned.

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 4cc59bae38dd..cd4364b32f6a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -920,7 +920,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
                                       struct drm_sched_job, list);
 
        if (job && (!job->s_fence->parent ||
-                   dma_fence_is_signaled(job->s_fence->parent))) {
+                   dma_fence_is_unlocked_and_signaled(job->s_fence->parent))) {
                /* remove job from pending_list */
                list_del_init(&job->list);
 
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 775cdc0b4f24..9c1efb6a8ddb 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -439,6 +439,24 @@ dma_fence_is_signaled(struct dma_fence *fence)
        return false;
 }
 
+static inline bool
+dma_fence_is_unlocked_and_signaled(struct dma_fence *fence)
+{
+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+               if(unlikely(spin_is_locked(fence->lock)))
+                       return false;
+               else
+                       return true;
+       }
+
+       if (fence->ops->signaled && fence->ops->signaled(fence)) {
+               dma_fence_signal(fence);
+               return true;
+       }
+
+       return false;
+}
+
 /**
  * __dma_fence_is_later - return if f1 is chronologically later than f2
  * @f1: the first fence's seqno

Edit: Checking the lock should be done after checking the bit here

One could also put setting DMA_FENCE_FLAG_SIGNALED_BIT at the end of dma_fence_signal_timestamp_locked (not tested, yet, the first fix seems okay so far):

int dma_fence_signal_timestamp_locked(struct dma_fence *fence,
				      ktime_t timestamp)
{
	struct dma_fence_cb *cur, *tmp;
	struct list_head cb_list;

	lockdep_assert_held(fence->lock);

	if (unlikely(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
				      &fence->flags)))
		return -EINVAL;

	/* Stash the cb_list before replacing it with the timestamp */
	list_replace(&fence->cb_list, &cb_list);

	fence->timestamp = timestamp;
	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
	trace_dma_fence_signaled(fence);

	list_for_each_entry_safe(cur, tmp, &cb_list, node) {
		INIT_LIST_HEAD(&cur->node);
		cur->func(fence, cur);
	}
        
        set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);

	return 0;
}

Putting the set_bit after the callbacks might slow things down so one could introduce another FLAG_BIT in dma_fence which is signaled after the callbacks have returned:

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 406b4e26f538..37e00edc1c7e 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -387,6 +387,8 @@ int dma_fence_signal_timestamp_locked(struct dma_fence *fence,
 		cur->func(fence, cur);
 	}
 
+	set_bit(DMA_FENCE_FLAG_SIGNALED_AND_CALLBACKS_RETURNED_BIT, &fence->flags);
+
 	return 0;
 }
 EXPORT_SYMBOL(dma_fence_signal_timestamp_locked);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 4cc59bae38dd..4bb90154722b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -920,7 +920,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
 				       struct drm_sched_job, list);
 
 	if (job && (!job->s_fence->parent ||
-		    dma_fence_is_signaled(job->s_fence->parent))) {
+		    dma_fence_is_signaled_and_callbacks_returned(job->s_fence->parent))) {
 		/* remove job from pending_list */
 		list_del_init(&job->list);
 
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 775cdc0b4f24..943e81deb99e 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
 	DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+	DMA_FENCE_FLAG_SIGNALED_AND_CALLBACKS_RETURNED_BIT,
 	DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
 };
 
@@ -439,6 +440,22 @@ dma_fence_is_signaled(struct dma_fence *fence)
 	return false;
 }
 
+
+static inline bool
+dma_fence_is_signaled_and_callbacks_returned(struct dma_fence *fence)
+{
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_AND_CALLBACKS_RETURNED_BIT, &fence->flags))
+		return true;
+
+	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+		dma_fence_signal(fence);
+		return true;
+	}
+
+	return false;
+}
+
+
 /**
  * __dma_fence_is_later - return if f1 is chronologically later than f2
  * @f1: the first fence's seqno

This has been fixed.

closed

mentioned in issue #3622 (closed)

changes in drm scheduler introduced by commit 2fe205008e9b70c67a9f3502831074ff36b00093 cause system lock-up

Designs

Child items ...

Activity

Admin message

Admin message

changes in drm scheduler introduced by commit 2fe205008e9b70c67a9f3502831074ff36b00093 cause system lock-up

Activity