Several kernel patches by Con Kolivas to compliment BFS, adapted for 2.6.28
authorPeter Hunt <peter_j_hunt@hotmail.com>
Wed, 25 May 2011 21:12:06 +0000 (21:12 +0000)
committerPeter Hunt <peter_j_hunt@hotmail.com>
Wed, 25 May 2011 21:12:06 +0000 (21:12 +0000)
kernel-bfs-2.6.28/debian/patches/cpufreq-bfs_tweaks.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/mm-drop_swap_cache_aggressively.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/mm-enable_swaptoken_only_when_swap_full.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/mm-idleprio_prio-1.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/mm-kswapd_inherit_prio-1.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/mm-make_swappiness_really_mean_it.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/sched-add-above-background-load-function.patch [new file with mode: 0644]
kernel-bfs-2.6.28/debian/patches/series

diff --git a/kernel-bfs-2.6.28/debian/patches/cpufreq-bfs_tweaks.patch b/kernel-bfs-2.6.28/debian/patches/cpufreq-bfs_tweaks.patch
new file mode 100644 (file)
index 0000000..877b6bf
--- /dev/null
@@ -0,0 +1,41 @@
+Because of the way BFS works it needs to transition up in frequency more
+aggressively and down more conservatively.
+
+-ck
+
+---
+ drivers/cpufreq/cpufreq_ondemand.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+Index: linux-2.6.34-ck1/drivers/cpufreq/cpufreq_ondemand.c
+===================================================================
+--- linux-2.6.34-ck1.orig/drivers/cpufreq/cpufreq_ondemand.c   2010-02-25 21:51:48.000000000 +1100
++++ linux-2.6.34-ck1/drivers/cpufreq/cpufreq_ondemand.c        2010-05-18 12:26:18.124319654 +1000
+@@ -28,10 +28,10 @@
+  * It helps to keep variable names smaller, simpler
+  */
+-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL               (10)
+-#define DEF_FREQUENCY_UP_THRESHOLD            (80)
++#define DEF_FREQUENCY_DOWN_DIFFERENTIAL               (17)
++#define DEF_FREQUENCY_UP_THRESHOLD            (63)
+ #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL     (3)
+-#define MICRO_FREQUENCY_UP_THRESHOLD          (95)
++#define MICRO_FREQUENCY_UP_THRESHOLD          (80)
+ #define MIN_FREQUENCY_UP_THRESHOLD            (11)
+ #define MAX_FREQUENCY_UP_THRESHOLD            (100)
+
+@@ -455,10 +455,10 @@ static void dbs_check_cpu(struct cpu_dbs
+       /*
+        * Every sampling_rate, we check, if current idle time is less
+-       * than 20% (default), then we try to increase frequency
++       * than 37% (default), then we try to increase frequency
+        * Every sampling_rate, we look for a the lowest
+        * frequency which can sustain the load while keeping idle time over
+-       * 30%. If such a frequency exist, we try to decrease to this frequency.
++       * 50%. If such a frequency exist, we try to decrease to this frequency.
+        *
+        * Any frequency increase takes it to the maximum frequency.
+        * Frequency reduction happens at minimum steps of
+
diff --git a/kernel-bfs-2.6.28/debian/patches/mm-drop_swap_cache_aggressively.patch b/kernel-bfs-2.6.28/debian/patches/mm-drop_swap_cache_aggressively.patch
new file mode 100644 (file)
index 0000000..6443346
--- /dev/null
@@ -0,0 +1,65 @@
+While it may be nice to have a copy of pages on swap once written there, the
+more garbage we leave in the swapspace the slower any further writes and
+reads to and from it are. Just free swapcache whenever we can.
+
+-ck
+
+---
+ include/linux/swap.h |    2 +-
+ mm/memory.c          |    2 +-
+ mm/swapfile.c        |    9 ++++-----
+ mm/vmscan.c          |    2 +-
+ 4 files changed, 7 insertions(+), 8 deletions(-)
+
+Index: linux-2.6.34-ck1/mm/memory.c
+===================================================================
+--- linux-2.6.34-ck1.orig/mm/memory.c  2010-05-18 12:24:33.852194874 +1000
++++ linux-2.6.34-ck1/mm/memory.c       2010-05-18 12:26:16.646319673 +1000
+@@ -2713,7 +2713,7 @@ static int do_swap_page(struct mm_struct
+       page_add_anon_rmap(page, vma, address);
+       swap_free(entry);
+-      if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
++      if ((vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+               remove_exclusive_swap_page(page);
+       unlock_page(page);
+Index: linux-2.6.34-ck1/mm/swapfile.c
+===================================================================
+@@ -712,8 +712,7 @@ int free_swap_and_cache(swp_entry_t entr
+               one_user = (page_count(page) == 2);
+               /* Only cache user (+us), or swap space full? Free it! */
+               /* Also recheck PageSwapCache after page is locked (above) */
+-              if (PageSwapCache(page) && !PageWriteback(page) &&
+-                                      (one_user || vm_swap_full())) {
++              if (PageSwapCache(page) && !PageWriteback(page)) {
+                       delete_from_swap_cache(page);
+                       SetPageDirty(page);
+               }
+Index: linux-2.6.34-ck1/mm/vmscan.c
+===================================================================
+--- linux-2.6.34-ck1.orig/mm/vmscan.c  2010-05-18 12:26:16.371569589 +1000
++++ linux-2.6.34-ck1/mm/vmscan.c       2010-05-18 12:26:16.647319427 +1000
+@@ -821,7 +821,7 @@ cull_mlocked:
+ activate_locked:
+               /* Not a candidate for swapping, so reclaim swap space. */
+-              if (PageSwapCache(page) && vm_swap_full())
++              if (PageSwapCache(page))
+                       remove_exclusive_swap_page_ref(page);
+               VM_BUG_ON(PageActive(page));
+               SetPageActive(page);
+Index: linux-2.6.34-ck1/include/linux/swap.h
+===================================================================
+--- linux-2.6.34-ck1.orig/include/linux/swap.h 2010-05-18 12:26:16.508569731 +1000
++++ linux-2.6.34-ck1/include/linux/swap.h      2010-05-18 12:26:16.647319427 +1000
+@@ -189,7 +189,7 @@ struct swap_list_t {
+       int next;       /* swapfile to be used next */
+ };
+-/* Swap 50% full? Release swapcache more aggressively.. */
++/* Swap 50% full? */
+ #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
+ /* linux/mm/page_alloc.c */
+
diff --git a/kernel-bfs-2.6.28/debian/patches/mm-enable_swaptoken_only_when_swap_full.patch b/kernel-bfs-2.6.28/debian/patches/mm-enable_swaptoken_only_when_swap_full.patch
new file mode 100644 (file)
index 0000000..ab73900
--- /dev/null
@@ -0,0 +1,26 @@
+The swap token is only useful in conditions of swap thrash, and actually
+worsens the common case by causing more swapping. Make it only have an effect
+when swap is more than half full.
+
+-ck
+
+---
+ include/linux/swap.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.39-ck1/include/linux/swap.h
+===================================================================
+--- linux-2.6.39-ck1.orig/include/linux/swap.h 2011-05-19 16:29:16.642275387 +1000
++++ linux-2.6.39-ck1/include/linux/swap.h      2011-05-19 19:36:35.917273667 +1000
+@@ -358,9 +358,10 @@ extern struct mm_struct *swap_token_mm;
+ extern void grab_swap_token(struct mm_struct *);
+ extern void __put_swap_token(struct mm_struct *);
++/* Only allow swap token to have effect if swap is full */
+ static inline int has_swap_token(struct mm_struct *mm)
+ {
+-      return (mm == swap_token_mm);
++      return (mm == swap_token_mm && vm_swap_full());
+ }
+ static inline void put_swap_token(struct mm_struct *mm)
diff --git a/kernel-bfs-2.6.28/debian/patches/mm-idleprio_prio-1.patch b/kernel-bfs-2.6.28/debian/patches/mm-idleprio_prio-1.patch
new file mode 100644 (file)
index 0000000..e80dad7
--- /dev/null
@@ -0,0 +1,38 @@
+Set the effective priority of idleprio tasks to that of nice 19 tasks when
+modifying vm reclaim behaviour.
+
+Signed-off-by: Con Kolivas <kernel@kolivas.org>
+
+ include/linux/sched.h |    2 +-
+ mm/vmscan.c           |    2 ++
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.34-ck1/mm/vmscan.c
+===================================================================
+--- linux-2.6.34-ck1.orig/mm/vmscan.c  2010-05-18 12:26:16.942194964 +1000
++++ linux-2.6.34-ck1/mm/vmscan.c       2010-05-18 12:26:17.090444482 +1000
+@@ -1711,6 +1711,8 @@ static inline int effective_sc_prio(stru
+       if (likely(p->mm)) {
+               if (rt_task(p))
+                       return -20;
++              if (p->policy == SCHED_IDLEPRIO)
++                      return 19;
+               return task_nice(p);
+       }
+       return 0;
+Index: linux-2.6.34-ck1/include/linux/sched.h
+===================================================================
+--- linux-2.6.34-ck1.orig/include/linux/sched.h        2010-05-18 12:26:16.086194917 +1000
++++ linux-2.6.34-ck1/include/linux/sched.h     2010-05-18 12:26:17.091445870 +1000
+@@ -38,9 +38,9 @@
+ #define SCHED_BATCH           3
+ /* SCHED_ISO: Implemented on BFS only */
+ #define SCHED_IDLE            5
++#define SCHED_IDLEPRIO                SCHED_IDLE
+ #ifdef CONFIG_SCHED_BFS
+ #define SCHED_ISO             4
+-#define SCHED_IDLEPRIO                SCHED_IDLE
+ #define SCHED_MAX             (SCHED_IDLEPRIO)
+ #define SCHED_RANGE(policy)   ((policy) <= SCHED_MAX)
+ #endif
+
diff --git a/kernel-bfs-2.6.28/debian/patches/mm-kswapd_inherit_prio-1.patch b/kernel-bfs-2.6.28/debian/patches/mm-kswapd_inherit_prio-1.patch
new file mode 100644 (file)
index 0000000..0467603
--- /dev/null
@@ -0,0 +1,75 @@
+When kswapd is awoken due to reclaim by a running task, set the priority of
+kswapd to that of the calling task thus making memory reclaim cpu activity
+affected by nice level.
+
+Signed-off-by: Con Kolivas <kernel@kolivas.org>
+
+ mm/vmscan.c |   33 ++++++++++++++++++++++++++++++++-
+ 1 file changed, 32 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.34-ck1/mm/vmscan.c
+===================================================================
+--- linux-2.6.34-ck1.orig/mm/vmscan.c  2010-05-18 12:26:16.647319427 +1000
++++ linux-2.6.34-ck1/mm/vmscan.c       2010-05-18 12:26:16.805569620 +1000
+@@ -1697,6 +1697,33 @@ static void shrink_zone(int priority, st
+ }
+ /*
++ * Helper functions to adjust nice level of kswapd, based on the priority of
++ * the task (p) that called it. If it is already higher priority we do not
++ * demote its nice level since it is still working on behalf of a higher
++ * priority task. With kernel threads we leave it at nice 0.
++ *
++ * We don't ever run kswapd real time, so if a real time task calls kswapd we
++ * set it to highest SCHED_NORMAL priority.
++ */
++static inline int effective_sc_prio(struct task_struct *p)
++{
++      if (likely(p->mm)) {
++              if (rt_task(p))
++                      return -20;
++              return task_nice(p);
++      }
++      return 0;
++}
++
++static void set_kswapd_nice(struct task_struct *kswapd, int active)
++{
++      long nice = effective_sc_prio(current);
++
++      if (task_nice(kswapd) > nice || !active)
++              set_user_nice(kswapd, nice);
++}
++
++/*
+  * This is the direct reclaim path, for page-allocating processes.  We only
+  * try to reclaim pages from zones which will satisfy the caller's allocation
+  * request.
+@@ -2294,6 +2321,7 @@ static int kswapd(void *p)
+                               }
+                       }
++                      set_user_nice(tsk, 0);
+                       order = pgdat->kswapd_max_order;
+               }
+               finish_wait(&pgdat->kswapd_wait, &wait);
+@@ -2318,6 +2346,7 @@ static int kswapd(void *p)
+ void wakeup_kswapd(struct zone *zone, int order)
+ {
+       pg_data_t *pgdat;
++      int active;
+       if (!populated_zone(zone))
+               return;
+@@ -2329,7 +2358,9 @@ void wakeup_kswapd(struct zone *zone, in
+               pgdat->kswapd_max_order = order;
+       if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
+               return;
+-      if (!waitqueue_active(&pgdat->kswapd_wait))
++      active = waitqueue_active(&pgdat->kswapd_wait);
++      set_kswapd_nice(pgdat->kswapd, active);
++      if (!active)
+               return;
+       wake_up_interruptible(&pgdat->kswapd_wait);
+ }
+
diff --git a/kernel-bfs-2.6.28/debian/patches/mm-make_swappiness_really_mean_it.patch b/kernel-bfs-2.6.28/debian/patches/mm-make_swappiness_really_mean_it.patch
new file mode 100644 (file)
index 0000000..16a2ac0
--- /dev/null
@@ -0,0 +1,35 @@
+Swappiness the tunable lies. It doesn't respect swappiness because it alters
+the value when we're more than lightly loaded in the vm. Change it to -really-
+mean swappiness unless we're about to go out of memory.
+
+-ck
+---
+ mm/vmscan.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.34-ck1/mm/vmscan.c
+===================================================================
+--- linux-2.6.34-ck1.orig/mm/vmscan.c  2010-05-18 12:24:33.974319780 +1000
++++ linux-2.6.34-ck1/mm/vmscan.c       2010-05-18 12:26:16.233444880 +1000
+@@ -1633,6 +1633,7 @@ static void shrink_zone(int priority, st
+       unsigned long nr_reclaimed = 0;
+       unsigned long percent[2];       /* anon @ 0; file @ 1 */
+       enum lru_list l;
++      int tmp_priority;
+       get_scan_ratio(zone, sc, percent);
+@@ -1648,7 +1649,11 @@ static void shrink_zone(int priority, st
+                       scan = zone_page_state(zone, NR_LRU_BASE + l);
+                       if (priority) {
+-                              scan >>= priority;
++                              tmp_priority = priority;
++
++                              if (file && priority > 0)
++                                      tmp_priority = DEF_PRIORITY;
++                              scan >>= tmp_priority;
+                               scan = (scan * percent[file]) / 100;
+                       }
+                       zone->lru[l].nr_scan += scan;
+
diff --git a/kernel-bfs-2.6.28/debian/patches/sched-add-above-background-load-function.patch b/kernel-bfs-2.6.28/debian/patches/sched-add-above-background-load-function.patch
new file mode 100644 (file)
index 0000000..5163565
--- /dev/null
@@ -0,0 +1,66 @@
+Add an "above background load" function which can be used for background
+tasks elsewhere (e.g. VM).
+
+-ck
+---
+ include/linux/sched.h |    7 +++++++
+ kernel/sched_bfs.c    |   20 ++++++++++++++++++++
+ 2 files changed, 27 insertions(+)
+
+Index: linux-2.6.39-ck1/include/linux/sched.h
+===================================================================
+--- linux-2.6.39-ck1.orig/include/linux/sched.h        2011-05-19 19:36:35.115273667 +1000
++++ linux-2.6.39-ck1/include/linux/sched.h     2011-05-19 19:36:35.551273667 +1000
+@@ -1590,6 +1590,7 @@ static inline int iso_task(struct task_s
+ {
+       return (p->policy == SCHED_ISO);
+ }
++extern int above_background_load(void);
+ #else /* CFS */
+ extern int runqueue_is_locked(int cpu);
+ extern void task_rq_unlock_wait(struct task_struct *p);
+@@ -1620,6 +1621,12 @@ static inline int iso_task(struct task_s
+ {
+       return 0;
+ }
++
++/* Anyone feel like implementing this? */
++static inline int above_background_load(void)
++{
++      return 1;
++}
+ #endif
+ /*
+Index: linux-2.6.39-ck1/kernel/sched_bfs.c
+===================================================================
+--- linux-2.6.39-ck1.orig/kernel/sched_bfs.c   2011-05-19 19:36:35.121273667 +1000
++++ linux-2.6.39-ck1/kernel/sched_bfs.c        2011-05-19 19:36:35.553273667 +1000
+@@ -563,6 +563,26 @@ static inline void __task_grq_unlock(voi
+       grq_unlock();
+ }
++/*
++ * Look for any tasks *anywhere* that are running nice 0 or better. We do
++ * this lockless for overhead reasons since the occasional wrong result
++ * is harmless.
++ */
++int above_background_load(void)
++{
++      struct task_struct *cpu_curr;
++      unsigned long cpu;
++
++      for_each_online_cpu(cpu) {
++              cpu_curr = cpu_rq(cpu)->curr;
++              if (unlikely(!cpu_curr))
++                      continue;
++              if (PRIO_TO_NICE(cpu_curr->static_prio) < 1)
++                      return 1;
++      }
++      return 0;
++}
++
+ #ifndef __ARCH_WANT_UNLOCKED_CTXSW
+ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+ {
+
index e349195..b1faea9 100644 (file)
@@ -34,6 +34,13 @@ bfs-363-to-400.patch
 bfs-400-to-401.patch
 bfs401-penalise_fork_depth_account_threads.patch
 bfs-401-to-404.patch
+sched-add-above-background-load-function.patch
+mm-make_swappiness_really_mean_it.patch
+mm-enable_swaptoken_only_when_swap_full.patch
+mm-drop_swap_cache_aggressively.patch
+mm-kswapd_inherit_prio-1.patch
+mm-idleprio_prio-1.patch
+cpufreq-bfs_tweaks.patch
 voltage_scaling_1.diff
 voltage_scaling_0.diff
 arm-proc-v7.diff