diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 27e35066a6029c864bc9e77bd30dd75ce6d3ec9c..892278f12dce904a8f58f77a254271337ab1d509 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -120,11 +120,26 @@ struct { } cpu_ctx_stor SEC(".maps"); /* Statistics */ -u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued; +u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq; u64 nr_core_sched_execed; u32 cpuperf_min, cpuperf_avg, cpuperf_max; u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max; +static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu) +{ + s32 cpu; + + if (p->nr_cpus_allowed == 1 || + scx_bpf_test_and_clear_cpu_idle(prev_cpu)) + return prev_cpu; + + cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); + if (cpu >= 0) + return cpu; + + return -1; +} + s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -137,17 +152,14 @@ s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p, return -ESRCH; } - if (p->nr_cpus_allowed == 1 || - scx_bpf_test_and_clear_cpu_idle(prev_cpu)) { + cpu = pick_direct_dispatch_cpu(p, prev_cpu); + + if (cpu >= 0) { tctx->force_local = true; + return cpu; + } else { return prev_cpu; } - - cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); - if (cpu >= 0) - return cpu; - - return prev_cpu; } static int weight_to_idx(u32 weight) @@ -172,6 +184,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) u32 pid = p->pid; int idx = weight_to_idx(p->scx.weight); void *ring; + s32 cpu; if (p->flags & PF_KTHREAD) { if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth)) @@ -207,6 +220,14 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) return; } + /* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */ + if (!(enq_flags & SCX_ENQ_WAKEUP) && + (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { + __sync_fetch_and_add(&nr_ddsp_from_enq, 1); + scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); + return; + } + /* * If the task was re-enqueued due to the CPU being preempted by a * higher priority scheduling class, just re-enqueue the task directly diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index 304f0488a386b8d2c64c8404183b9f324d9e991c..c9ca30d62b2b19df5ba9c730a94510b07c1930c1 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -116,10 +116,11 @@ int main(int argc, char **argv) long nr_enqueued = skel->bss->nr_enqueued; long nr_dispatched = skel->bss->nr_dispatched; - printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64"\n", + printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n", nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched, skel->bss->nr_reenqueued, skel->bss->nr_dequeued, - skel->bss->nr_core_sched_execed); + skel->bss->nr_core_sched_execed, + skel->bss->nr_ddsp_from_enq); if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur")) printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n", skel->bss->cpuperf_min,