Commit b0a9499c authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] sched: add new SCHED_BATCH policy

Add a new SCHED_BATCH (3) scheduling policy: such tasks are presumed
CPU-intensive, and will acquire a constant +5 priority level penalty.  Such
policy is nice for workloads that are non-interactive, but which do not
want to give up their nice levels.  The policy is also useful for workloads
that want a deterministic scheduling policy without interactivity causing
extra preemptions (between that workload's tasks).
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 2d0cfb52
...@@ -160,6 +160,7 @@ extern unsigned long nr_iowait(void); ...@@ -160,6 +160,7 @@ extern unsigned long nr_iowait(void);
#define SCHED_NORMAL 0 #define SCHED_NORMAL 0
#define SCHED_FIFO 1 #define SCHED_FIFO 1
#define SCHED_RR 2 #define SCHED_RR 2
#define SCHED_BATCH 3
struct sched_param { struct sched_param {
int sched_priority; int sched_priority;
...@@ -470,9 +471,9 @@ struct signal_struct { ...@@ -470,9 +471,9 @@ struct signal_struct {
/* /*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT * Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL tasks are * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
* in the range MAX_RT_PRIO..MAX_PRIO-1. Priority values * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
* are inverted: lower p->prio value means higher priority. * values are inverted: lower p->prio value means higher priority.
* *
* The MAX_USER_RT_PRIO value allows the actual maximum * The MAX_USER_RT_PRIO value allows the actual maximum
* RT priority to be separate from the value exported to * RT priority to be separate from the value exported to
......
...@@ -244,7 +244,9 @@ static inline void reparent_to_init(void) ...@@ -244,7 +244,9 @@ static inline void reparent_to_init(void)
/* Set the exit signal to SIGCHLD so we signal init on exit */ /* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD; current->exit_signal = SIGCHLD;
if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0)) if ((current->policy == SCHED_NORMAL ||
current->policy == SCHED_BATCH)
&& (task_nice(current) < 0))
set_user_nice(current, 0); set_user_nice(current, 0);
/* cpus_allowed? */ /* cpus_allowed? */
/* rt_priority? */ /* rt_priority? */
......
...@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now) ...@@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
unsigned long long __sleep_time = now - p->timestamp; unsigned long long __sleep_time = now - p->timestamp;
unsigned long sleep_time; unsigned long sleep_time;
if (__sleep_time > NS_MAX_SLEEP_AVG) if (unlikely(p->policy == SCHED_BATCH))
sleep_time = NS_MAX_SLEEP_AVG; sleep_time = 0;
else else {
sleep_time = (unsigned long)__sleep_time; if (__sleep_time > NS_MAX_SLEEP_AVG)
sleep_time = NS_MAX_SLEEP_AVG;
else
sleep_time = (unsigned long)__sleep_time;
}
if (likely(sleep_time > 0)) { if (likely(sleep_time > 0)) {
/* /*
...@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice) ...@@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice)
* The RT priorities are set via sched_setscheduler(), but we still * The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected * allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is * it wont have any effect on scheduling until the task is
* not SCHED_NORMAL: * not SCHED_NORMAL/SCHED_BATCH:
*/ */
if (rt_task(p)) { if (rt_task(p)) {
p->static_prio = NICE_TO_PRIO(nice); p->static_prio = NICE_TO_PRIO(nice);
...@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) ...@@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
BUG_ON(p->array); BUG_ON(p->array);
p->policy = policy; p->policy = policy;
p->rt_priority = prio; p->rt_priority = prio;
if (policy != SCHED_NORMAL) if (policy != SCHED_NORMAL && policy != SCHED_BATCH) {
p->prio = MAX_RT_PRIO-1 - p->rt_priority; p->prio = MAX_RT_PRIO-1 - p->rt_priority;
else } else {
p->prio = p->static_prio; p->prio = p->static_prio;
/*
* SCHED_BATCH tasks are treated as perpetual CPU hogs:
*/
if (policy == SCHED_BATCH)
p->sleep_avg = 0;
}
} }
/** /**
...@@ -3733,29 +3743,35 @@ int sched_setscheduler(struct task_struct *p, int policy, ...@@ -3733,29 +3743,35 @@ int sched_setscheduler(struct task_struct *p, int policy,
if (policy < 0) if (policy < 0)
policy = oldpolicy = p->policy; policy = oldpolicy = p->policy;
else if (policy != SCHED_FIFO && policy != SCHED_RR && else if (policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_NORMAL) policy != SCHED_NORMAL && policy != SCHED_BATCH)
return -EINVAL; return -EINVAL;
/* /*
* Valid priorities for SCHED_FIFO and SCHED_RR are * Valid priorities for SCHED_FIFO and SCHED_RR are
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
* SCHED_BATCH is 0.
*/ */
if (param->sched_priority < 0 || if (param->sched_priority < 0 ||
(p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && param->sched_priority > MAX_RT_PRIO-1)) (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL; return -EINVAL;
if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
!= (param->sched_priority == 0))
return -EINVAL; return -EINVAL;
/* /*
* Allow unprivileged RT tasks to decrease priority: * Allow unprivileged RT tasks to decrease priority:
*/ */
if (!capable(CAP_SYS_NICE)) { if (!capable(CAP_SYS_NICE)) {
/* can't change policy */ /*
if (policy != p->policy && * can't change policy, except between SCHED_NORMAL
!p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) * and SCHED_BATCH:
*/
if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) &&
(policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) &&
!p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
return -EPERM; return -EPERM;
/* can't increase priority */ /* can't increase priority */
if (policy != SCHED_NORMAL && if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
param->sched_priority > p->rt_priority && param->sched_priority > p->rt_priority &&
param->sched_priority > param->sched_priority >
p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) p->signal->rlim[RLIMIT_RTPRIO].rlim_cur)
...@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) ...@@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
ret = MAX_USER_RT_PRIO-1; ret = MAX_USER_RT_PRIO-1;
break; break;
case SCHED_NORMAL: case SCHED_NORMAL:
case SCHED_BATCH:
ret = 0; ret = 0;
break; break;
} }
...@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy) ...@@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
ret = 1; ret = 1;
break; break;
case SCHED_NORMAL: case SCHED_NORMAL:
case SCHED_BATCH:
ret = 0; ret = 0;
} }
return ret; return ret;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment