Clean up patch dir; synchronize patches with kernel-power v48

[kernel-bfs] / kernel-bfs-2.6.28 / debian / patches / bfs-330-to-350.patch
diff --git a/kernel-bfs-2.6.28/debian/patches/bfs-330-to-350.patch b/kernel-bfs-2.6.28/debian/patches/bfs-330-to-350.patch

deleted file mode 100644 (file)

index ad90774..0000000
--- a/kernel-bfs-2.6.28/debian/patches/bfs-330-to-350.patch
+++ /dev/null
@@ -1,1052 +0,0 @@
-- Major overhaul of queued changes.
-
-- Microoptimise multiplications/divisions to be shifts where suitable.
-
-- Change ISO calculations to have their own lock so as to not grab the grq
-lock during a scheduler tick.
-
-- Change all deadline accounting to use nanosecond values.
-
-- Introduce local niffies variable which is updated from any runqueue using the
-TSC clock whenever the grq lock is taken. Use niffies to compare deadlines to.
-This will give much more granular deadlines when jiffies are low resolution
-such as 100Hz, and rarely will tasks have the same deadlines now.
-
-- Drop the "skip_clock_update" concept as we update the niffies each time we
-update the rq clocks, thus we want to update it more often.
-
-- Rework try_preempt.
-
-- Bypass rechecking deadline when we know that prev will run again in schedule.
-
-- Check to see if prev can run on an idle CPU when being descheduled as may
-happen when a task must use a certain CPU for affinity reasons.
-
-- Decrease maximum rr_interval possible to 1000 (one second) as there is no
-demonstrable advantage to higher values, and may overflow with other changes
-being introduced.
-
-- Check for when tasks are scheduled within a tick to see if they're in the
-1st or 2nd half of the tick. Use this to decide how far into last tick a task
-can run. This will make the greatest difference on lower Hz values with small
-rr intervals.
-
-- Change the test for exhausted time slice to 100us as rescheduling with less
-time available than this will either greatly overrun its quota or reschedule
-very quickly.
-
-- Change SCHED_BATCH tasks to refill timeslices and reset deadline every time
-they're descheduled as they've been flagged as latency insensitive, likely
-fully CPU bound tasks. This should decrease the impact running batch tasks
-has on other tasks.
-
-- Microoptimise before context switch in schedule()
-
-- Add a local last_task variable to each runqueue which keeps a copy of the
-last non-idle task that ran on this CPU. Use this value to determine that a
-task is still cache warm on this CPU even if it has run elsewhere in the
-meantime. This improves throughput on relatively idle systems with >2 logical
-CPUs.
-
-- Remove the first_time_slice concept as it wasn't contributing on any
-meaningful level but was adding to overhead, especially on sched_exit.
-
-- Check that when a task forks and loses timeslice to its child that it isn't
-due to be descheduled and ensure that a child inherits the proper variables
-from its parent across fork.
-
-- Fix try_preempt which may have been picking a higher priority runqueue on
-SMP if it had a later deadline. Remove the test for equal deadline as
-nanosecond deadline resolution means this will never happen.
-
-- Random other minor cleanups.
-
--ck
----
-
- include/linux/sched.h                 |    4 
- kernel/sched_bfs.c                    |  466 +++++++++++++++++++++-------------
- kernel/sysctl.c                       |    4 
- 4 files changed, 293 insertions(+), 183 deletions(-)
-
-Index: kernel-2.6.28/include/linux/sched.h
-===================================================================
---- kernel-2.6.28.orig/include/linux/sched.h
-+++ kernel-2.6.28/include/linux/sched.h
-@@ -1121,8 +1121,8 @@ struct task_struct {
-       int prio, static_prio, normal_prio;
-       unsigned int rt_priority;
- #ifdef CONFIG_SCHED_BFS
--      int time_slice, first_time_slice;
--      unsigned long deadline;
-+      int time_slice;
-+      u64 deadline;
-       struct list_head run_list;
-       u64 last_ran;
-       u64 sched_time; /* sched_clock time spent running */
-@@ -1426,7 +1426,7 @@ static inline void tsk_cpus_current(stru
- 
- static inline void print_scheduler_version(void)
- {
--      printk(KERN_INFO"BFS CPU scheduler v0.330 by Con Kolivas ported by ToAsTcfh.\n");
-+      printk(KERN_INFO"BFS CPU scheduler v0.350 by Con Kolivas ported by ToAsTcfh.\n");
- }
- 
- static inline int iso_task(struct task_struct *p)
-Index: kernel-2.6.28/kernel/sched_bfs.c
-===================================================================
---- kernel-2.6.28.orig/kernel/sched_bfs.c
-+++ kernel-2.6.28/kernel/sched_bfs.c
-@@ -102,10 +102,19 @@
- #define MAX_USER_PRIO         (USER_PRIO(MAX_PRIO))
- #define SCHED_PRIO(p)         ((p)+MAX_RT_PRIO)
- 
--/* Some helpers for converting to/from various scales.*/
-+/*
-+ * Some helpers for converting to/from various scales. Use shifts to get
-+ * approximate multiples of ten for less overhead.
-+ */
- #define JIFFIES_TO_NS(TIME)   ((TIME) * (1000000000 / HZ))
--#define MS_TO_NS(TIME)                ((TIME) * 1000000)
--#define MS_TO_US(TIME)                ((TIME) * 1000)
-+#define HALF_JIFFY_NS         (1000000000 / HZ / 2)
-+#define HALF_JIFFY_US         (1000000 / HZ / 2)
-+#define MS_TO_NS(TIME)                ((TIME) << 20)
-+#define MS_TO_US(TIME)                ((TIME) << 10)
-+#define NS_TO_MS(TIME)                ((TIME) >> 20)
-+#define NS_TO_US(TIME)                ((TIME) >> 10)
-+
-+#define RESCHED_US    (100) /* Reschedule if less than this many us left */
- 
- #ifdef CONFIG_SMP
- /*
-@@ -157,8 +166,9 @@ static inline unsigned long timeslice(vo
- }
- 
- /*
-- * The global runqueue data that all CPUs work off. All data is protected
-- * by grq.lock.
-+ * The global runqueue data that all CPUs work off. Data is protected either
-+ * by the global grq lock, or the discrete lock that precedes the data in this
-+ * struct.
-  */
- struct global_rq {
-       spinlock_t lock;
-@@ -167,17 +177,17 @@ struct global_rq {
-       unsigned long long nr_switches;
-       struct list_head queue[PRIO_LIMIT];
-       DECLARE_BITMAP(prio_bitmap, PRIO_LIMIT + 1);
--      int iso_ticks;
--      int iso_refractory;
- #ifdef CONFIG_SMP
-       unsigned long qnr; /* queued not running */
-       cpumask_t cpu_idle_map;
-       int idle_cpus;
- #endif
--#if BITS_PER_LONG < 64
--      unsigned long jiffies;
--      u64 jiffies_64;
--#endif
-+      /* Nanosecond jiffies */
-+      u64 niffies;
-+
-+      spinlock_t iso_lock;
-+      int iso_ticks;
-+      int iso_refractory;
- };
- 
- /* There can be only one */
-@@ -192,8 +202,8 @@ struct rq {
- #ifdef CONFIG_NO_HZ
-       unsigned char in_nohz_recently;
- #endif
-+      struct task_struct *last_task;
- #endif
--      unsigned int skip_clock_update;
- 
-       struct task_struct *curr, *idle;
-       struct mm_struct *prev_mm;
-@@ -229,9 +239,11 @@ struct rq {
-       /* See if all cache siblings are idle */
-       cpumask_t cache_siblings;
- #endif
-+      u64 last_niffy; /* Last time this RQ updated grq.niffies */
- #endif
-+      u64 clock, old_clock, last_tick;
-+      int dither;
- 
--      u64 clock;
- #ifdef CONFIG_SCHEDSTATS
- 
-       /* latency stats */
-@@ -290,14 +302,6 @@ struct root_domain {
- static struct root_domain def_root_domain;
- #endif
- 
--static inline int cpu_of(struct rq *rq)
--{
--#ifdef CONFIG_SMP
--      return rq->cpu;
--#else
--      return 0;
--#endif
--}
- 
- /*
-  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
-@@ -309,17 +313,65 @@ static inline int cpu_of(struct rq *rq)
- #define for_each_domain(cpu, __sd) \
-       for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
- 
-+static inline void update_rq_clock(struct rq *rq);
-+
- #ifdef CONFIG_SMP
- #define cpu_rq(cpu)           (&per_cpu(runqueues, (cpu)))
- #define this_rq()             (&__get_cpu_var(runqueues))
- #define task_rq(p)            cpu_rq(task_cpu(p))
- #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
-+static inline int cpu_of(struct rq *rq)
-+{
-+      return rq->cpu;
-+}
-+
-+/*
-+ * Niffies are a globally increasing nanosecond counter. Whenever a runqueue
-+ * clock is updated with the grq.lock held, it is an opportunity to update the
-+ * niffies value. Any CPU can update it by adding how much its clock has
-+ * increased since it last updated niffies, minus any added niffies by other
-+ * CPUs.
-+ */
-+static inline void update_clocks(struct rq *rq)
-+{
-+      s64 ndiff;
-+
-+      update_rq_clock(rq);
-+      ndiff = rq->clock - rq->old_clock;
-+      /* old_clock is only updated when we are updating niffies */
-+      rq->old_clock = rq->clock;
-+      ndiff -= grq.niffies - rq->last_niffy;
-+      /*
-+       * Sanity check should sched_clock return bogus values or be limited to
-+       * just jiffy resolution. Some time will always have passed.
-+       */
-+      if (unlikely(ndiff < 1 || ndiff > MS_TO_NS(rr_interval)))
-+              ndiff = 1;
-+      grq.niffies += ndiff;
-+      rq->last_niffy = grq.niffies;
-+}
- #else /* CONFIG_SMP */
- static struct rq *uprq;
- #define cpu_rq(cpu)   (uprq)
- #define this_rq()     (uprq)
- #define task_rq(p)    (uprq)
- #define cpu_curr(cpu) ((uprq)->curr)
-+static inline int cpu_of(struct rq *rq)
-+{
-+      return 0;
-+}
-+
-+static inline void update_clocks(struct rq *rq)
-+{
-+      s64 ndiff;
-+
-+      update_rq_clock(rq);
-+      ndiff = rq->clock - rq->old_clock;
-+      rq->old_clock = rq->clock;
-+      if (unlikely(ndiff < 1 || ndiff > MS_TO_US(rr_interval)))
-+              ndiff = 1;
-+      grq.niffies += ndiff;
-+}
- #endif
- 
- #include "sched_stats.h"
-@@ -333,13 +385,13 @@ static struct rq *uprq;
- 
- /*
-  * All common locking functions performed on grq.lock. rq->clock is local to
-- * the cpu accessing it so it can be modified just with interrupts disabled,
-- * but looking up task_rq must be done under grq.lock to be safe.
-+ * the CPU accessing it so it can be modified just with interrupts disabled
-+ * when we're not updating niffies.
-+ * Looking up task_rq must be done under grq.lock to be safe.
-  */
- static inline void update_rq_clock(struct rq *rq)
- {
--      if (!rq->skip_clock_update)
--              rq->clock = sched_clock_cpu(cpu_of(rq));
-+      rq->clock = sched_clock_cpu(cpu_of(rq));
- }
- 
- static inline int task_running(struct task_struct *p)
-@@ -368,8 +420,8 @@ static inline void grq_lock_irq(void)
- static inline void time_lock_grq(struct rq *rq)
-       __acquires(grq.lock)
- {
--      update_rq_clock(rq);
-       grq_lock();
-+      update_clocks(rq);
- }
- 
- static inline void grq_unlock_irq(void)
-@@ -403,7 +455,7 @@ static inline struct rq
-       __acquires(grq.lock)
- {
-       struct rq *rq = task_grq_lock(p, flags);
--      update_rq_clock(rq);
-+      update_clocks(rq);
-       return rq;
- }
- 
-@@ -418,7 +470,7 @@ static inline void time_task_grq_lock_ir
-       __acquires(grq.lock)
- {
-       struct rq *rq = task_grq_lock_irq(p);
--      update_rq_clock(rq);
-+      update_clocks(rq);
- }
- 
- static inline void task_grq_unlock_irq(void)
-@@ -513,33 +565,6 @@ static inline void finish_lock_switch(st
- }
- #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
- 
--/*
-- * In order to have a monotonic clock that does not wrap we have a 64 bit
-- * unsigned long that's protected by grq.lock used in place of jiffies on
-- * 32 bit builds.
-- */
--#if BITS_PER_LONG < 64
--static inline void update_gjiffies(void)
--{
--      if (grq.jiffies != jiffies) {
--              grq_lock();
--              grq.jiffies = jiffies;
--              grq.jiffies_64++;
--              grq_unlock();
--      }
--}
--
--#define gjiffies (grq.jiffies_64)
--
--#else /* BITS_PER_LONG < 64 */
--static inline void update_gjiffies(void)
--{
--}
--
--#define gjiffies jiffies
--
--#endif /* BITS_PER_LONG < 64 */
--
- static inline int deadline_before(u64 deadline, u64 time)
- {
-       return (deadline < time);
-@@ -572,17 +597,6 @@ static void dequeue_task(struct task_str
- }
- 
- /*
-- * When a task is freshly forked, the first_time_slice flag is set to say
-- * it has taken time_slice from its parent and if it exits on this first
-- * time_slice it can return its time_slice back to the parent.
-- */
--static inline void reset_first_time_slice(struct task_struct *p)
--{
--      if (unlikely(p->first_time_slice))
--              p->first_time_slice = 0;
--}
--
--/*
-  * To determine if it's safe for a task of SCHED_IDLEPRIO to actually run as
-  * an idle task, we ensure none of the following conditions are met.
-  */
-@@ -644,11 +658,11 @@ static inline int task_prio_ratio(struct
- /*
-  * task_timeslice - all tasks of all priorities get the exact same timeslice
-  * length. CPU distribution is handled by giving different deadlines to
-- * tasks of different priorities.
-+ * tasks of different priorities. Use 128 as the base value for fast shifts.
-  */
- static inline int task_timeslice(struct task_struct *p)
- {
--      return (rr_interval * task_prio_ratio(p) / 100);
-+      return (rr_interval * task_prio_ratio(p) / 128);
- }
- 
- #ifdef CONFIG_SMP
-@@ -700,6 +714,15 @@ static int suitable_idle_cpus(struct tas
- 
- static void resched_task(struct task_struct *p);
- 
-+/*
-+ * last_task stores the last non-idle task scheduled on the local rq for
-+ * cache warmth testing.
-+ */
-+static inline void set_last_task(struct rq *rq, struct task_struct *p)
-+{
-+      rq->last_task = p;
-+}
-+
- #define CPUIDLE_CACHE_BUSY    (1)
- #define CPUIDLE_DIFF_CPU      (2)
- #define CPUIDLE_THREAD_BUSY   (4)
-@@ -722,6 +745,9 @@ static void resched_task(struct task_str
-  * Other node, other CPU, idle cache, idle threads.
-  * Other node, other CPU, busy cache, idle threads.
-  * Other node, other CPU, busy threads.
-+ *
-+ * If p was the last task running on this rq, then regardless of where
-+ * it has been running since then, it is cache warm on this rq.
-  */
- static void resched_best_idle(struct task_struct *p)
- {
-@@ -754,11 +780,14 @@ static void resched_best_idle(struct tas
-               tmp_rq = cpu_rq(cpu_tmp);
- 
-               if (rq->cpu_locality[cpu_tmp]) {
-+                      /* Check rq->last_task hasn't been dereferenced */
-+                      if (rq->last_task && p != rq->last_task) {
- #ifdef CONFIG_NUMA
--                      if (rq->cpu_locality[cpu_tmp] > 1)
--                              ranking |= CPUIDLE_DIFF_NODE;
-+                              if (rq->cpu_locality[cpu_tmp] > 1)
-+                                      ranking |= CPUIDLE_DIFF_NODE;
- #endif
--                      ranking |= CPUIDLE_DIFF_CPU;
-+                              ranking |= CPUIDLE_DIFF_CPU;
-+                      }
-               }
- #ifdef CONFIG_SCHED_MC
-               if (!(tmp_rq->cache_idle(cpu_tmp)))
-@@ -800,6 +829,11 @@ static inline void resched_suitable_idle
- static inline int
- cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
- {
-+      /* Check rq->last_task hasn't been dereferenced */
-+      if (likely(rq->last_task)) {
-+              if (rq->last_task == p)
-+                      return 0;
-+      }
-       return rq->cpu_locality[cpu_of(task_rq)] * task_timeslice(p);
- }
- #else /* CONFIG_SMP */
-@@ -839,6 +873,10 @@ cache_distance(struct rq *task_rq, struc
- {
-       return 0;
- }
-+
-+static inline void set_last_task(struct rq *rq, struct task_struct *p)
-+{
-+}
- #endif /* CONFIG_SMP */
- 
- /*
-@@ -886,7 +924,7 @@ static int effective_prio(struct task_st
-  */
- static void activate_task(struct task_struct *p, struct rq *rq)
- {
--      update_rq_clock(rq);
-+      update_clocks(rq);
- 
-       /*
-        * Sleep time is in units of nanosecs, so shift by 20 to get a
-@@ -1157,8 +1195,28 @@ void kick_process(struct task_struct *p)
- #endif
- 
- #define rq_idle(rq)   ((rq)->rq_prio == PRIO_LIMIT)
--#define task_idle(p)  ((p)->prio == PRIO_LIMIT)
- 
-+/*
-+ * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-+ * basis of earlier deadlines. SCHED_IDLEPRIO don't preempt anything else or
-+ * between themselves, they cooperatively multitask. An idle rq scores as
-+ * prio PRIO_LIMIT so it is always preempted.
-+ */
-+static inline int
-+can_preempt(struct task_struct *p, int prio, unsigned long deadline,
-+          unsigned int policy)
-+{
-+      /* Better static priority RT task or better policy preemption */
-+      if (p->prio < prio)
-+              return 1;
-+      if (p->prio > prio)
-+              return 0;
-+      /* SCHED_NORMAL, BATCH and ISO will preempt based on deadline */
-+      if (!deadline_before(p->deadline, deadline))
-+              return 0;
-+      return 1;
-+}
-+#ifdef CONFIG_SMP
- #ifdef CONFIG_HOTPLUG_CPU
- /*
-  * Check to see if there is a task that is affined only to offline CPUs but
-@@ -1179,14 +1237,20 @@ static inline int online_cpus(struct tas
- 
- 
- /*
-- * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
-- * basis of earlier deadlines. SCHED_BATCH, ISO and IDLEPRIO don't preempt
-- * between themselves, they cooperatively multitask. An idle rq scores as
-- * prio PRIO_LIMIT so it is always preempted. latest_deadline and
-- * highest_prio_rq are initialised only to silence the compiler. When
-- * all else is equal, still prefer this_rq.
-+ * Check to see if p can run on cpu, and if not, whether there are any online
-+ * CPUs it can run on instead.
-+ */
-+static inline int needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+      if (unlikely(!cpu_isset(cpu, p->cpus_allowed) && online_cpus(p)))
-+              return 1;
-+      return 0;
-+}
-+
-+/*
-+ * latest_deadline and highest_prio_rq are initialised only to silence the
-+ * compiler. When all else is equal, still prefer this_rq.
-  */
--#ifdef CONFIG_SMP
- static void try_preempt(struct task_struct *p, struct rq *this_rq)
- {
-       struct rq *highest_prio_rq = this_rq;
-@@ -1194,6 +1258,10 @@ static void try_preempt(struct task_stru
-       int highest_prio;
-       cpumask_t tmp;
- 
-+      /* IDLEPRIO tasks never preempt anything */
-+      if (p->policy == SCHED_IDLEPRIO)
-+              return;
-+
-       if (suitable_idle_cpus(p)) {
-               resched_best_idle(p);
-               return;
-@@ -1220,30 +1288,32 @@ static void try_preempt(struct task_stru
-               offset_deadline = rq->rq_deadline -
-                                 cache_distance(this_rq, rq, p);
- 
--              if (rq_prio > highest_prio ||
--                  (deadline_after(offset_deadline, latest_deadline) ||
--                  (offset_deadline == latest_deadline && this_rq == rq))) {
-+              if (rq_prio > highest_prio || (rq_prio == highest_prio &&
-+                  deadline_after(offset_deadline, latest_deadline))) {
-                       latest_deadline = offset_deadline;
-                       highest_prio = rq_prio;
-                       highest_prio_rq = rq;
-               }
-       }
- 
--      if (p->prio > highest_prio || (p->prio == highest_prio &&
--          p->policy == SCHED_NORMAL &&
--          !deadline_before(p->deadline, latest_deadline)))
-+      if (!can_preempt(p, highest_prio, highest_prio_rq->rq_deadline,
-+          highest_prio_rq->rq_policy))
-               return;
- 
--      /* p gets to preempt highest_prio_rq->curr */
-       resched_task(highest_prio_rq->curr);
--      highest_prio_rq->skip_clock_update = 1;
- }
- #else /* CONFIG_SMP */
-+static inline int needs_other_cpu(struct task_struct *p, int cpu)
-+{
-+      return 0;
-+}
-+
- static void try_preempt(struct task_struct *p, struct rq *this_rq)
- {
--      if (p->prio < uprq->rq_prio ||
--          (p->prio == uprq->rq_prio && p->policy == SCHED_NORMAL &&
--           deadline_before(p->deadline, uprq->rq_deadline)))
-+      if (p->policy == SCHED_IDLEPRIO)
-+              return;
-+      if (can_preempt(p, uprq->rq_prio, uprq->rq_deadline,
-+          uprq->rq_policy))
-               resched_task(uprq->curr);
- }
- #endif /* CONFIG_SMP */
-@@ -1331,12 +1401,15 @@ int wake_up_state(struct task_struct *p,
-       return try_to_wake_up(p, state, 0);
- }
- 
-+static void time_slice_expired(struct task_struct *p);
-+
- /*
-  * Perform scheduler related setup for a newly forked process p.
-  * p is forked by current.
-  */
- void sched_fork(struct task_struct *p, int clone_flags)
- {
-+      struct task_struct *curr;
-       int cpu = get_cpu();
-       struct rq *rq;
- 
-@@ -1376,10 +1449,11 @@ void sched_fork(struct task_struct *p, i
-               p->sched_reset_on_fork = 0;
-       }
- 
-+      curr = current;
-       /*
-        * Make sure we do not leak PI boosting priority to the child:
-        */
--      p->prio = current->normal_prio;
-+      p->prio = curr->normal_prio;
- 
-       INIT_LIST_HEAD(&p->run_list);
- #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
-@@ -1400,18 +1474,26 @@ void sched_fork(struct task_struct *p, i
-        * total amount of pending timeslices in the system doesn't change,
-        * resulting in more scheduling fairness. If it's negative, it won't
-        * matter since that's the same as being 0. current's time_slice is
--       * actually in rq_time_slice when it's running.
-+       * actually in rq_time_slice when it's running, as is its last_ran
-+       * value. rq->rq_deadline is only modified within schedule() so it
-+       * is always equal to current->deadline.
-        */
--      rq = task_grq_lock_irq(current);
--      if (likely(rq->rq_time_slice > 0)) {
-+      rq = task_grq_lock_irq(curr);
-+      if (likely(rq->rq_time_slice >= RESCHED_US * 2)) {
-               rq->rq_time_slice /= 2;
-+              p->time_slice = rq->rq_time_slice;
-+      } else {
-               /*
--               * The remainder of the first timeslice might be recovered by
--               * the parent if the child exits early enough.
-+               * Forking task has run out of timeslice. Reschedule it and
-+               * start its child with a new time slice and deadline. The
-+               * child will end up running first because its deadline will
-+               * be slightly earlier.
-                */
--              p->first_time_slice = 1;
-+              rq->rq_time_slice = 0;
-+              set_tsk_need_resched(curr);
-+              time_slice_expired(p);
-       }
--      p->time_slice = rq->rq_time_slice;
-+      p->last_ran = rq->rq_last_ran;
-       task_grq_unlock_irq();
- out:
-       put_cpu();
-@@ -1452,40 +1534,9 @@ void wake_up_new_task(struct task_struct
-       task_grq_unlock(&flags);
- }
- 
--/*
-- * Potentially available exiting-child timeslices are
-- * retrieved here - this way the parent does not get
-- * penalised for creating too many threads.
-- *
-- * (this cannot be used to 'generate' timeslices
-- * artificially, because any timeslice recovered here
-- * was given away by the parent in the first place.)
-- */
-+/* Nothing to do here */
- void sched_exit(struct task_struct *p)
- {
--      struct task_struct *parent;
--      unsigned long flags;
--      struct rq *rq;
--
--      if (unlikely(p->first_time_slice)) {
--              int *par_tslice, *p_tslice;
--
--              parent = p->parent;
--              par_tslice = &parent->time_slice;
--              p_tslice = &p->time_slice;
--
--              rq = task_grq_lock(parent, &flags);
--              /* The real time_slice of the "curr" task is on the rq var.*/
--              if (p == rq->curr)
--                      p_tslice = &rq->rq_time_slice;
--              else if (parent == task_rq(parent)->curr)
--                      par_tslice = &rq->rq_time_slice;
--
--              *par_tslice += *p_tslice;
--              if (unlikely(*par_tslice > timeslice()))
--                      *par_tslice = timeslice();
--              task_grq_unlock(&flags);
--      }
- }
- 
- #ifdef CONFIG_PREEMPT_NOTIFIERS
-@@ -1900,7 +1951,7 @@ update_cpu_clock(struct rq *rq, struct t
-               else if (unlikely(time_diff > JIFFIES_TO_NS(1)))
-                       time_diff = JIFFIES_TO_NS(1);
- 
--              rq->rq_time_slice -= time_diff / 1000;
-+              rq->rq_time_slice -= NS_TO_US(time_diff);
-       }
-       rq->rq_last_ran = rq->timekeep_clock = rq->clock;
- }
-@@ -1916,7 +1967,7 @@ static u64 do_task_delta_exec(struct tas
-       u64 ns = 0;
- 
-       if (p == rq->curr) {
--              update_rq_clock(rq);
-+              update_clocks(rq);
-               ns = rq->clock - rq->rq_last_ran;
-               if (unlikely((s64)ns < 0))
-                       ns = 0;
-@@ -2090,10 +2141,22 @@ void account_idle_ticks(unsigned long ti
- }
- #endif
- 
-+static inline void grq_iso_lock(void)
-+      __acquires(grq.iso_lock)
-+{
-+      spin_lock(&grq.iso_lock);
-+}
-+
-+static inline void grq_iso_unlock(void)
-+      __releases(grq.iso_lock)
-+{
-+      spin_unlock(&grq.iso_lock);
-+}
-+
- /*
-  * Functions to test for when SCHED_ISO tasks have used their allocated
-  * quota as real time scheduling and convert them back to SCHED_NORMAL.
-- * Where possible, the data is tested lockless, to avoid grabbing grq_lock
-+ * Where possible, the data is tested lockless, to avoid grabbing iso_lock
-  * because the occasional inaccurate result won't matter. However the
-  * tick data is only ever modified under lock. iso_refractory is only simply
-  * set to 0 or 1 so it's not worth grabbing the lock yet again for that.
-@@ -2128,21 +2191,21 @@ static unsigned int test_ret_isorefracto
- 
- static void iso_tick(void)
- {
--      grq_lock();
-+      grq_iso_lock();
-       grq.iso_ticks += 100;
--      grq_unlock();
-+      grq_iso_unlock();
- }
- 
- /* No SCHED_ISO task was running so decrease rq->iso_ticks */
- static inline void no_iso_tick(void)
- {
-       if (grq.iso_ticks) {
--              grq_lock();
-+              grq_iso_lock();
-               grq.iso_ticks -= grq.iso_ticks / ISO_PERIOD + 1;
-               if (unlikely(grq.iso_refractory && grq.iso_ticks <
-                   ISO_PERIOD * (sched_iso_cpu * 115 / 128)))
-                       clear_iso_refractory();
--              grq_unlock();
-+              grq_iso_unlock();
-       }
- }
- 
-@@ -2181,10 +2244,23 @@ static void task_running_tick(struct rq
-       }
- 
-       /* SCHED_FIFO tasks never run out of timeslice. */
--      if (rq_idle(rq) || rq->rq_time_slice > 0 || rq->rq_policy == SCHED_FIFO)
-+      if (rq->rq_policy == SCHED_FIFO)
-               return;
-+      /*
-+       * Tasks that were scheduled in the first half of a tick are not
-+       * allowed to run into the 2nd half of the next tick if they will
-+       * run out of time slice in the interim. Otherwise, if they have
-+       * less than 100us of time slice left they will be rescheduled.
-+       */
-+      if (rq->dither) {
-+              if (rq->rq_time_slice > HALF_JIFFY_US)
-+                      return;
-+              else
-+                      rq->rq_time_slice = 0;
-+      } else if (rq->rq_time_slice >= RESCHED_US)
-+                      return;
- 
--      /* p->time_slice <= 0. We only modify task_struct under grq lock */
-+      /* p->time_slice < RESCHED_US. We only modify task_struct under grq lock */
-       p = rq->curr;
-       requeue_task(p);
-       grq_lock();
-@@ -2205,13 +2281,14 @@ void scheduler_tick(void)
-       struct rq *rq = cpu_rq(cpu);
- 
-       sched_clock_tick();
-+      /* grq lock not grabbed, so only update rq clock */
-       update_rq_clock(rq);
-       update_cpu_clock(rq, rq->curr, 1);
--      update_gjiffies();
-       if (!rq_idle(rq))
-               task_running_tick(rq);
-       else
-               no_iso_tick();
-+      rq->last_tick = rq->clock;
- }
- 
- #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-@@ -2273,7 +2350,7 @@ EXPORT_SYMBOL(sub_preempt_count);
- #endif
- 
- /*
-- * Deadline is "now" in gjiffies + (offset by priority). Setting the deadline
-+ * Deadline is "now" in niffies + (offset by priority). Setting the deadline
-  * is the key to everything. It distributes cpu fairly amongst tasks of the
-  * same nice value, it proportions cpu according to nice level, it means the
-  * task that last woke up the longest ago has the earliest deadline, thus
-@@ -2283,7 +2360,7 @@ EXPORT_SYMBOL(sub_preempt_count);
-  */
- static inline int prio_deadline_diff(int user_prio)
- {
--      return (prio_ratios[user_prio] * rr_interval * HZ / (1000 * 100)) ? : 1;
-+      return (prio_ratios[user_prio] * rr_interval * (MS_TO_NS(1) / 128));
- }
- 
- static inline int task_deadline_diff(struct task_struct *p)
-@@ -2296,25 +2373,33 @@ static inline int static_deadline_diff(i
-       return prio_deadline_diff(USER_PRIO(static_prio));
- }
- 
--static inline int longest_deadline_diff(void)
-+static inline int ms_longest_deadline_diff(void)
- {
--      return prio_deadline_diff(39);
-+      return NS_TO_MS(prio_deadline_diff(39));
- }
- 
- /*
-  * The time_slice is only refilled when it is empty and that is when we set a
-  * new deadline.
-  */
--static inline void time_slice_expired(struct task_struct *p)
-+static void time_slice_expired(struct task_struct *p)
- {
--      reset_first_time_slice(p);
-       p->time_slice = timeslice();
--      p->deadline = gjiffies + task_deadline_diff(p);
-+      p->deadline = grq.niffies + task_deadline_diff(p);
- }
- 
-+/*
-+ * Timeslices below RESCHED_US are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left. SCHED_BATCH tasks
-+ * have been flagged be not latency sensitive and likely to be fully CPU
-+ * bound so every time they're rescheduled they have their time_slice
-+ * refilled, but get a new later deadline to have little effect on
-+ * SCHED_NORMAL tasks.
-+
-+ */
- static inline void check_deadline(struct task_struct *p)
- {
--      if (p->time_slice <= 0)
-+      if (p->time_slice < RESCHED_US || batch_task(p))
-               time_slice_expired(p);
- }
- 
-@@ -2352,7 +2437,7 @@ retry:
-       queue = grq.queue + idx;
-       list_for_each_entry(p, queue, run_list) {
-               /* Make sure cpu affinity is ok */
--              if (online_cpus(p) && !cpu_isset(cpu, p->cpus_allowed))
-+              if (needs_other_cpu(p, cpu))
-                       continue;
-               if (idx < MAX_RT_PRIO) {
-                       /* We found an rt task */
-@@ -2479,12 +2564,14 @@ need_resched_nonpreemptible:
-       deactivate = 0;
-       schedule_debug(prev);
- 
--      local_irq_disable();
--      update_rq_clock(rq);
-+      grq_lock_irq();
-+      update_clocks(rq);
-       update_cpu_clock(rq, prev, 0);
--      rq->skip_clock_update = 0;
-+      if (rq->clock - rq->last_tick > HALF_JIFFY_NS)
-+              rq->dither = 0;
-+      else
-+              rq->dither = 1;
- 
--      grq_lock();
-       clear_tsk_need_resched(prev);
- 
-       if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-@@ -2500,35 +2587,53 @@ need_resched_nonpreemptible:
-               prev->time_slice = rq->rq_time_slice;
-               prev->deadline = rq->rq_deadline;
-               check_deadline(prev);
--              return_task(prev, deactivate);
--              /* Task changed affinity off this cpu */
--              if (unlikely(!cpus_intersects(prev->cpus_allowed,
--                  cpumask_of_cpu(cpu)))) {
--                      if (online_cpus(prev))
-+              prev->last_ran = rq->clock;
-+
-+              /* Task changed affinity off this CPU */
-+              if (needs_other_cpu(prev, cpu))
-+                      resched_suitable_idle(prev);
-+              else if (!deactivate) {
-+                      if (!queued_notrunning()) {
-+                              /*
-+                              * We now know prev is the only thing that is
-+                              * awaiting CPU so we can bypass rechecking for
-+                              * the earliest deadline task and just run it
-+                              * again.
-+                              */
-+                              grq_unlock_irq();
-+                              goto rerun_prev_unlocked;
-+                      } else {
-+                              /*
-+                               * If prev got kicked off by a task that has to
-+                               * run on this CPU for affinity reasons then
-+                               * there may be an idle CPU it can go to.
-+                               */
-                               resched_suitable_idle(prev);
-                       }
-+              }
-+              return_task(prev, deactivate);
-       }
- 
--      if (likely(queued_notrunning())) {
--              next = earliest_deadline_task(rq, idle);
--      } else {
-+      if (unlikely(!queued_notrunning())) {
-+              /*
-+               * This CPU is now truly idle as opposed to when idle is
-+               * scheduled as a high priority task in its own right.
-+               */
-               next = idle;
-               schedstat_inc(rq, sched_goidle);
--      }
--
--      prefetch(next);
--      prefetch_stack(next);
--
--      if (task_idle(next))
-               set_cpuidle_map(cpu);
--      else
-+      } else {
-+              next = earliest_deadline_task(rq, idle);
-+              prefetch(next);
-+              prefetch_stack(next);
-               clear_cpuidle_map(cpu);
--
--      prev->last_ran = rq->clock;
-+      }
- 
-       if (likely(prev != next)) {
-               sched_info_switch(prev, next);
- 
-+              if (prev != idle)
-+                      set_last_task(rq, prev);
-               set_rq_task(rq, next);
-               grq.nr_switches++;
-               prev->oncpu = 0;
-@@ -2547,10 +2652,15 @@ need_resched_nonpreemptible:
-       } else
-               grq_unlock_irq();
- 
--      if (unlikely(reacquire_kernel_lock(current) < 0))
-+rerun_prev_unlocked:
-+      if (unlikely(reacquire_kernel_lock(current) < 0)) {
-+//            prev = rq->curr;
-+//            switch_count = &prev->nivcsw;
-               goto need_resched_nonpreemptible;
-+      }
-+
-       preempt_enable_no_resched();
--      if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
-+      if (need_resched())
-               goto need_resched;
- }
- EXPORT_SYMBOL(schedule);
-@@ -3066,8 +3176,9 @@ int task_prio(const struct task_struct *
-       if (prio <= 0)
-               goto out;
- 
--      delta = p->deadline - gjiffies;
--      delta = delta * 40 / longest_deadline_diff();
-+      /* Convert to ms to avoid overflows */
-+      delta = NS_TO_MS(p->deadline - grq.niffies);
-+      delta = delta * 40 / ms_longest_deadline_diff();
-       if (delta > 0 && delta <= 80)
-               prio += delta;
-       if (idleprio_task(p))
-@@ -3266,7 +3377,7 @@ recheck:
-               policy = oldpolicy = -1;
-               goto recheck;
-       }
--      update_rq_clock(rq);
-+      update_clocks(rq);
-       p->sched_reset_on_fork = reset_on_fork;
- 
-       queued = task_queued(p);
-@@ -4444,7 +4556,6 @@ migration_call(struct notifier_block *nf
- 
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
--              cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
-               idle = rq->idle;
-               /* Idle task back to normal (off runqueue, low prio) */
-               grq_lock_irq();
-@@ -4453,9 +4564,8 @@ migration_call(struct notifier_block *nf
-               __setscheduler(idle, rq, SCHED_NORMAL, 0);
-               idle->prio = PRIO_LIMIT;
-               set_rq_task(rq, idle);
--              update_rq_clock(rq);
-+              update_clocks(rq);
-               grq_unlock_irq();
--              cpuset_unlock();
-               break;
- 
-       case CPU_DYING:
-@@ -5982,12 +6093,14 @@ void __init sched_init(void)
-       int i;
-       struct rq *rq;
- 
--      prio_ratios[0] = 100;
-+      prio_ratios[0] = 128;
-       for (i = 1 ; i < PRIO_RANGE ; i++)
-               prio_ratios[i] = prio_ratios[i - 1] * 11 / 10;
- 
-       spin_lock_init(&grq.lock);
-       grq.nr_running = grq.nr_uninterruptible = grq.nr_switches = 0;
-+      grq.niffies = 0;
-+      spin_lock_init(&grq.iso_lock);
-       grq.iso_ticks = grq.iso_refractory = 0;
- #ifdef CONFIG_SMP
-       init_defrootdomain();
-@@ -6000,7 +6113,9 @@ void __init sched_init(void)
-               rq = cpu_rq(i);
-               rq->user_pc = rq->nice_pc = rq->softirq_pc = rq->system_pc =
-                             rq->iowait_pc = rq->idle_pc = 0;
-+              rq->dither = 0;
- #ifdef CONFIG_SMP
-+              rq->last_niffy = 0;
-               rq->sd = NULL;
-               rq->rd = NULL;
-               rq->online = 0;
-@@ -6219,10 +6334,6 @@ cputime_t task_stime(struct task_struct
- }
- #endif
- 
--void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
--{
--}
--
- inline cputime_t task_gtime(struct task_struct *p)
- {
-       return p->gtime;
-Index: kernel-2.6.28/kernel/sysctl.c
-===================================================================
---- kernel-2.6.28.orig/kernel/sysctl.c
-+++ kernel-2.6.28/kernel/sysctl.c
-@@ -102,7 +102,7 @@ static int __read_mostly one_hundred = 1
- #ifdef CONFIG_SCHED_BFS
- extern int rr_interval;
- extern int sched_iso_cpu;
--static int __read_mostly five_thousand = 5000;
-+static int __read_mostly one_thousand = 1000;
- #endif
- /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
- static int maxolduid = 65535;
-@@ -732,7 +732,7 @@ static struct ctl_table kern_table[] = {
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &one,
--              .extra2         = &five_thousand,
-+              .extra2         = &one_thousand,
-       },
-       {
-               .ctl_name       = CTL_UNNUMBERED,