kernel/sysctl.c | 4
4 files changed, 293 insertions(+), 183 deletions(-)
-Index: linux-2.6.35-bfs/include/linux/sched.h
+Index: kernel-2.6.28/include/linux/sched.h
===================================================================
---- linux-2.6.35-bfs.orig/include/linux/sched.h 2010-09-25 08:18:08.792894602 +1000
-+++ linux-2.6.35-bfs/include/linux/sched.h 2010-09-25 08:20:25.822886826 +1000
-@@ -1118,7 +1118,7 @@ struct task_struct {
+--- kernel-2.6.28.orig/include/linux/sched.h
++++ kernel-2.6.28/include/linux/sched.h
+@@ -1121,8 +1121,8 @@ struct task_struct {
int prio, static_prio, normal_prio;
unsigned int rt_priority;
#ifdef CONFIG_SCHED_BFS
+ u64 deadline;
struct list_head run_list;
u64 last_ran;
-@@ -1547,7 +1547,7 @@ static inline void tsk_cpus_current(stru
+ u64 sched_time; /* sched_clock time spent running */
+@@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
static inline void print_scheduler_version(void)
{
}
static inline int iso_task(struct task_struct *p)
-Index: linux-2.6.35-bfs/kernel/sched_bfs.c
+Index: kernel-2.6.28/kernel/sched_bfs.c
===================================================================
---- linux-2.6.35-bfs.orig/kernel/sched_bfs.c 2010-09-25 08:18:08.804894864 +1000
-+++ linux-2.6.35-bfs/kernel/sched_bfs.c 2010-09-25 08:20:25.827886935 +1000
-@@ -106,10 +106,19 @@
+--- kernel-2.6.28.orig/kernel/sched_bfs.c
++++ kernel-2.6.28/kernel/sched_bfs.c
+@@ -102,10 +102,19 @@
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
#define SCHED_PRIO(p) ((p)+MAX_RT_PRIO)
+
+#define RESCHED_US (100) /* Reschedule if less than this many us left */
+ #ifdef CONFIG_SMP
/*
- * This is the time all tasks within the same priority round robin.
-@@ -140,8 +149,9 @@ static inline unsigned long timeslice(vo
+@@ -157,8 +166,9 @@ static inline unsigned long timeslice(vo
}
/*
+ * struct.
*/
struct global_rq {
- raw_spinlock_t lock;
-@@ -150,17 +160,17 @@ struct global_rq {
+ spinlock_t lock;
+@@ -167,17 +177,17 @@ struct global_rq {
unsigned long long nr_switches;
struct list_head queue[PRIO_LIMIT];
DECLARE_BITMAP(prio_bitmap, PRIO_LIMIT + 1);
};
/* There can be only one */
-@@ -176,8 +186,8 @@ struct rq {
- u64 nohz_stamp;
+@@ -192,8 +202,8 @@ struct rq {
+ #ifdef CONFIG_NO_HZ
unsigned char in_nohz_recently;
#endif
+ struct task_struct *last_task;
struct task_struct *curr, *idle;
struct mm_struct *prev_mm;
-@@ -213,9 +223,11 @@ struct rq {
+@@ -229,9 +239,11 @@ struct rq {
/* See if all cache siblings are idle */
cpumask_t cache_siblings;
#endif
#ifdef CONFIG_SCHEDSTATS
/* latency stats */
-@@ -290,12 +290,4 @@ struct root_domain {
+@@ -290,14 +302,6 @@ struct root_domain {
static struct root_domain def_root_domain;
#endif
- return 0;
-#endif
-}
-
-@@ -310,17 +313,65 @@ static inline int cpu_of(struct rq *rq)
+
+ /*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+@@ -309,17 +313,65 @@ static inline int cpu_of(struct rq *rq)
#define for_each_domain(cpu, __sd) \
- for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+ for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+static inline void update_rq_clock(struct rq *rq);
+
+ grq.niffies += ndiff;
+}
#endif
- #define raw_rq() (&__raw_get_cpu_var(runqueues))
-@@ -335,13 +386,13 @@ static struct rq *uprq;
+ #include "sched_stats.h"
+@@ -333,13 +385,13 @@ static struct rq *uprq;
/*
* All common locking functions performed on grq.lock. rq->clock is local to
+ * when we're not updating niffies.
+ * Looking up task_rq must be done under grq.lock to be safe.
*/
--static inline void update_rq_clock(struct rq *rq)
-+static inline void update_rq_clock(struct rq *rq)
+ static inline void update_rq_clock(struct rq *rq)
{
- if (!rq->skip_clock_update)
- rq->clock = sched_clock_cpu(cpu_of(rq));
}
static inline int task_running(struct task_struct *p)
-@@ -370,8 +421,8 @@ static inline void grq_lock_irq(void)
+@@ -368,8 +420,8 @@ static inline void grq_lock_irq(void)
static inline void time_lock_grq(struct rq *rq)
__acquires(grq.lock)
{
}
static inline void grq_unlock_irq(void)
-@@ -405,7 +456,7 @@ static inline struct rq
+@@ -403,7 +455,7 @@ static inline struct rq
__acquires(grq.lock)
{
struct rq *rq = task_grq_lock(p, flags);
return rq;
}
-@@ -420,7 +471,7 @@ static inline void time_task_grq_lock_ir
+@@ -418,7 +470,7 @@ static inline void time_task_grq_lock_ir
__acquires(grq.lock)
{
struct rq *rq = task_grq_lock_irq(p);
}
static inline void task_grq_unlock_irq(void)
-@@ -515,33 +566,6 @@ static inline void finish_lock_switch(st
+@@ -513,33 +565,6 @@ static inline void finish_lock_switch(st
}
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
static inline int deadline_before(u64 deadline, u64 time)
{
return (deadline < time);
-@@ -574,17 +598,6 @@ static void dequeue_task(struct task_str
+@@ -572,17 +597,6 @@ static void dequeue_task(struct task_str
}
/*
* To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
* an idle task, we ensure none of the following conditions are met.
*/
-@@ -646,11 +659,11 @@ static inline int task_prio_ratio(struct
+@@ -644,11 +658,11 @@ static inline int task_prio_ratio(struct
/*
* task_timeslice - all tasks of all priorities get the exact same timeslice
* length. CPU distribution is handled by giving different deadlines to
}
#ifdef CONFIG_SMP
-@@ -702,6 +715,15 @@ static int suitable_idle_cpus(struct tas
+@@ -700,6 +714,15 @@ static int suitable_idle_cpus(struct tas
static void resched_task(struct task_struct *p);
#define CPUIDLE_CACHE_BUSY (1)
#define CPUIDLE_DIFF_CPU (2)
#define CPUIDLE_THREAD_BUSY (4)
-@@ -724,6 +746,9 @@ static void resched_task(struct task_str
+@@ -722,6 +745,9 @@ static void resched_task(struct task_str
* Other node, other CPU, idle cache, idle threads.
* Other node, other CPU, busy cache, idle threads.
* Other node, other CPU, busy threads.
*/
static void resched_best_idle(struct task_struct *p)
{
-@@ -756,11 +781,14 @@ static void resched_best_idle(struct tas
+@@ -754,11 +780,14 @@ static void resched_best_idle(struct tas
tmp_rq = cpu_rq(cpu_tmp);
if (rq->cpu_locality[cpu_tmp]) {
}
#ifdef CONFIG_SCHED_MC
if (!(tmp_rq->cache_idle(cpu_tmp)))
-@@ -802,6 +830,11 @@ static inline void resched_suitable_idle
+@@ -800,6 +829,11 @@ static inline void resched_suitable_idle
static inline int
cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
{
return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
}
#else /* CONFIG_SMP */
-@@ -840,6 +873,10 @@ cache_distance(struct rq *task_rq, struc
+@@ -839,6 +873,10 @@ cache_distance(struct rq *task_rq, struc
{
return 0;
}
#endif /* CONFIG_SMP */
/*
-@@ -887,7 +924,7 @@ static int effective_prio(struct task_st
+@@ -886,7 +924,7 @@ static int effective_prio(struct task_st
*/
static void activate_task(struct task_struct *p, struct rq *rq)
{
/*
* Sleep time is in units of nanosecs, so shift by 20 to get a
-@@ -1157,8 +1194,28 @@ EXPORT_SYMBOL_GPL(kick_process);
+@@ -1157,8 +1195,28 @@ void kick_process(struct task_struct *p)
#endif
#define rq_idle(rq) ((rq)->rq_prio == PRIO_LIMIT)
#ifdef CONFIG_HOTPLUG_CPU
/*
* Check to see if there is a task that is affined only to offline CPUs but
-@@ -1178,14 +1235,20 @@ static inline int online_cpus(struct tas
- #endif
+@@ -1179,14 +1237,20 @@ static inline int online_cpus(struct tas
+
/*
- * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
static void try_preempt(struct task_struct *p, struct rq *this_rq)
{
struct rq *highest_prio_rq = this_rq;
-@@ -1193,6 +1256,10 @@ static void try_preempt(struct task_stru
+@@ -1194,6 +1258,10 @@ static void try_preempt(struct task_stru
int highest_prio;
cpumask_t tmp;
if (suitable_idle_cpus(p)) {
resched_best_idle(p);
return;
-@@ -1219,30 +1286,32 @@ static void try_preempt(struct task_stru
+@@ -1220,30 +1288,32 @@ static void try_preempt(struct task_stru
offset_deadline = rq->rq_deadline -
cache_distance(this_rq, rq, p);
resched_task(uprq->curr);
}
#endif /* CONFIG_SMP */
-@@ -1352,12 +1421,15 @@ int wake_up_state(struct task_struct *p,
+@@ -1331,12 +1401,15 @@ int wake_up_state(struct task_struct *p,
return try_to_wake_up(p, state, 0);
}
int cpu = get_cpu();
struct rq *rq;
-@@ -1396,10 +1468,11 @@ void sched_fork(struct task_struct *p, i
+@@ -1376,10 +1449,11 @@ void sched_fork(struct task_struct *p, i
p->sched_reset_on_fork = 0;
}
+ curr = current;
/*
- * Make sure we do not leak PI boosting priority to the child.
+ * Make sure we do not leak PI boosting priority to the child:
*/
- p->prio = current->normal_prio;
+ p->prio = curr->normal_prio;
INIT_LIST_HEAD(&p->run_list);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
-@@ -1420,18 +1493,26 @@ void sched_fork(struct task_struct *p, i
+@@ -1400,18 +1474,26 @@ void sched_fork(struct task_struct *p, i
* total amount of pending timeslices in the system doesn't change,
* resulting in more scheduling fairness. If it's negative, it won't
* matter since that's the same as being 0. current's time_slice is
task_grq_unlock_irq();
out:
put_cpu();
-@@ -1470,40 +1551,9 @@ void wake_up_new_task(struct task_struct
+@@ -1452,40 +1534,9 @@ void wake_up_new_task(struct task_struct
task_grq_unlock(&flags);
}
}
#ifdef CONFIG_PREEMPT_NOTIFIERS
-@@ -1981,7 +2031,7 @@ update_cpu_clock(struct rq *rq, struct t
+@@ -1900,7 +1951,7 @@ update_cpu_clock(struct rq *rq, struct t
else if (unlikely(time_diff > JIFFIES_TO_NS(1)))
time_diff = JIFFIES_TO_NS(1);
}
rq->rq_last_ran = rq->timekeep_clock = rq->clock;
}
-@@ -1997,7 +2047,7 @@ static u64 do_task_delta_exec(struct tas
+@@ -1916,7 +1967,7 @@ static u64 do_task_delta_exec(struct tas
u64 ns = 0;
if (p == rq->curr) {
ns = rq->clock - rq->rq_last_ran;
if (unlikely((s64)ns < 0))
ns = 0;
-@@ -2171,10 +2221,22 @@ void account_idle_ticks(unsigned long ti
+@@ -2090,10 +2141,22 @@ void account_idle_ticks(unsigned long ti
}
#endif
* because the occasional inaccurate result won't matter. However the
* tick data is only ever modified under lock. iso_refractory is only simply
* set to 0 or 1 so it's not worth grabbing the lock yet again for that.
-@@ -2209,21 +2271,21 @@ static unsigned int test_ret_isorefracto
+@@ -2128,21 +2191,21 @@ static unsigned int test_ret_isorefracto
static void iso_tick(void)
{
}
}
-@@ -2262,10 +2324,23 @@ static void task_running_tick(struct rq
+@@ -2181,10 +2244,23 @@ static void task_running_tick(struct rq
}
/* SCHED_FIFO tasks never run out of timeslice. */
p = rq->curr;
requeue_task(p);
grq_lock();
-@@ -2286,13 +2361,14 @@ void scheduler_tick(void)
+@@ -2205,13 +2281,14 @@ void scheduler_tick(void)
struct rq *rq = cpu_rq(cpu);
sched_clock_tick();
else
no_iso_tick();
+ rq->last_tick = rq->clock;
- perf_event_task_tick(rq->curr);
}
-@@ -2354,7 +2430,7 @@ EXPORT_SYMBOL(sub_preempt_count);
+ #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+@@ -2273,7 +2350,7 @@ EXPORT_SYMBOL(sub_preempt_count);
#endif
/*
* is the key to everything. It distributes cpu fairly amongst tasks of the
* same nice value, it proportions cpu according to nice level, it means the
* task that last woke up the longest ago has the earliest deadline, thus
-@@ -2364,7 +2440,7 @@ EXPORT_SYMBOL(sub_preempt_count);
+@@ -2283,7 +2360,7 @@ EXPORT_SYMBOL(sub_preempt_count);
*/
static inline int prio_deadline_diff(int user_prio)
{
}
static inline int task_deadline_diff(struct task_struct *p)
-@@ -2377,25 +2453,33 @@ static inline int static_deadline_diff(i
+@@ -2296,25 +2373,33 @@ static inline int static_deadline_diff(i
return prio_deadline_diff(USER_PRIO(static_prio));
}
time_slice_expired(p);
}
-@@ -2433,7 +2517,7 @@ retry:
+@@ -2352,7 +2437,7 @@ retry:
queue = grq.queue + idx;
list_for_each_entry(p, queue, run_list) {
/* Make sure cpu affinity is ok */
continue;
if (idx < MAX_RT_PRIO) {
/* We found an rt task */
-@@ -2560,12 +2644,14 @@ need_resched_nonpreemptible:
+@@ -2479,12 +2564,14 @@ need_resched_nonpreemptible:
deactivate = 0;
schedule_debug(prev);
clear_tsk_need_resched(prev);
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-@@ -2581,36 +2667,54 @@ need_resched_nonpreemptible:
+@@ -2500,35 +2587,53 @@ need_resched_nonpreemptible:
prev->time_slice = rq->rq_time_slice;
prev->deadline = rq->rq_deadline;
check_deadline(prev);
if (likely(prev != next)) {
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next);
+ if (prev != idle)
+ set_last_task(rq, prev);
set_rq_task(rq, next);
grq.nr_switches++;
prev->oncpu = 0;
-@@ -2629,6 +2733,7 @@ need_resched_nonpreemptible:
+@@ -2547,10 +2652,15 @@ need_resched_nonpreemptible:
} else
grq_unlock_irq();
+- if (unlikely(reacquire_kernel_lock(current) < 0))
+rerun_prev_unlocked:
- if (unlikely(reacquire_kernel_lock(current) < 0)) {
- prev = rq->curr;
- switch_count = &prev->nivcsw;
-@@ -3324,8 +3429,9 @@ int task_prio(const struct task_struct *
++ if (unlikely(reacquire_kernel_lock(current) < 0)) {
++ prev = rq->curr;
++ switch_count = &prev->nivcsw;
+ goto need_resched_nonpreemptible;
++ }
++
+ preempt_enable_no_resched();
+- if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
++ if (need_resched())
+ goto need_resched;
+ }
+ EXPORT_SYMBOL(schedule);
+@@ -3066,8 +3176,9 @@ int task_prio(const struct task_struct *
if (prio <= 0)
goto out;
if (delta > 0 && delta <= 80)
prio += delta;
if (idleprio_task(p))
-@@ -3533,7 +3639,7 @@ recheck:
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+@@ -3266,7 +3377,7 @@ recheck:
+ policy = oldpolicy = -1;
goto recheck;
}
- update_rq_clock(rq);
p->sched_reset_on_fork = reset_on_fork;
queued = task_queued(p);
-@@ -4835,7 +4941,7 @@ migration_call(struct notifier_block *nf
+@@ -4453,7 +4564,7 @@ migration_call(struct notifier_block *nf
__setscheduler(idle, rq, SCHED_NORMAL, 0);
idle->prio = PRIO_LIMIT;
set_rq_task(rq, idle);
- update_rq_clock(rq);
+ update_clocks(rq);
grq_unlock_irq();
+ cpuset_unlock();
break;
-
-@@ -6531,12 +6637,14 @@ void __init sched_init(void)
+@@ -5982,12 +6093,14 @@ void __init sched_init(void)
int i;
struct rq *rq;
for (i = 1 ; i < PRIO_RANGE ; i++)
prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
- raw_spin_lock_init(&grq.lock);
+ spin_lock_init(&grq.lock);
grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0;
+ grq.niffies = 0;
+ raw_spin_lock_init(&grq.iso_lock);
grq.iso_ticks = grq.iso_refractory = 0;
#ifdef CONFIG_SMP
init_defrootdomain();
-@@ -6549,7 +6657,9 @@ void __init sched_init(void)
+@@ -6000,7 +6113,9 @@ void __init sched_init(void)
rq = cpu_rq(i);
rq->user_pc = rq->nice_pc = rq->softirq_pc = rq->system_pc =
rq->iowait_pc = rq->idle_pc = 0;
rq->sd = NULL;
rq->rd = NULL;
rq->online = 0;
-Index: linux-2.6.35-bfs/kernel/sysctl.c
+Index: kernel-2.6.28/kernel/sysctl.c
===================================================================
---- linux-2.6.35-bfs.orig/kernel/sysctl.c 2010-09-25 08:18:30.147361076 +1000
-+++ linux-2.6.35-bfs/kernel/sysctl.c 2010-09-25 08:20:25.823886848 +1000
-@@ -119,7 +119,7 @@ static int __maybe_unused one_hundred =
+--- kernel-2.6.28.orig/kernel/sysctl.c
++++ kernel-2.6.28/kernel/sysctl.c
+@@ -102,7 +102,7 @@ static int __read_mostly one_hundred = 1
#ifdef CONFIG_SCHED_BFS
extern int rr_interval;
extern int sched_iso_cpu;
-static int __read_mostly five_thousand = 5000;
+static int __read_mostly one_thousand = 1000;
#endif
- #ifdef CONFIG_PRINTK
- static int ten_thousand = 10000;
-@@ -794,7 +794,7 @@ static struct ctl_table kern_table[] = {
- .mode = 0644,
+ /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
+ static int maxolduid = 65535;
+@@ -732,7 +732,7 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
.extra1 = &one,
- .extra2 = &five_thousand,
+ .extra2 = &one_thousand,
},
{
- .procname = "iso_cpu",
+ .ctl_name = CTL_UNNUMBERED,