From 129a62c4af6cc614ebb497a1077389187f2aee3a Mon Sep 17 00:00:00 2001 From: Dennis Groenen Date: Fri, 28 Jan 2011 21:33:50 +0100 Subject: [PATCH] add anti I/O stalling patch by Adam Polkosnik (disabled by default) --- kernel-bfs-2.6.28/debian/patches/series | 1 + .../patches/vanilla-2.6.28-anti-io-stalling.patch | 138 ++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch diff --git a/kernel-bfs-2.6.28/debian/patches/series b/kernel-bfs-2.6.28/debian/patches/series index c7d0ab6..8af8cba 100644 --- a/kernel-bfs-2.6.28/debian/patches/series +++ b/kernel-bfs-2.6.28/debian/patches/series @@ -46,3 +46,4 @@ radio-bcm2048.diff i2c-battery.diff usbhostmode.diff bt-mice.diff +#vanilla-2.6.28-anti-io-stalling.patch diff --git a/kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch b/kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch new file mode 100644 index 0000000..f66781b --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/vanilla-2.6.28-anti-io-stalling.patch @@ -0,0 +1,138 @@ +--- linux-2.6.28.orig/mm/vmscan.c 2008-12-25 00:26:37.000000000 +0100 ++++ linux-2.6.28/mm/vmscan.c 2010-12-08 12:32:23.203333383 +0100 +@@ -72,6 +72,12 @@ struct scan_control { + + int order; + ++ /* ++ * Intend to reclaim enough contenious memory rather than to reclaim ++ * enough amount memory. I.e, it's the mode for high order allocation. ++ */ ++ bool lumpy_reclaim_mode; ++ + /* Which cgroup do we reclaim from */ + struct mem_cgroup *mem_cgroup; + +@@ -1024,6 +1030,47 @@ int isolate_lru_page(struct page *page) + } + + /* ++ * Returns true if the caller should wait to clean dirty/writeback pages. ++ * ++ * If we are direct reclaiming for contiguous pages and we do not reclaim ++ * everything in the list, try again and wait for writeback IO to complete. ++ * This will stall high-order allocations noticeably. Only do that when really ++ * need to free the pages under high memory pressure. ++ */ ++static inline bool should_reclaim_stall(unsigned long nr_taken, ++ unsigned long nr_freed, ++ int priority, ++ struct scan_control *sc) ++{ ++ int lumpy_stall_priority; ++ ++ /* kswapd should not stall on sync IO */ ++ if (current_is_kswapd()) ++ return false; ++ ++ /* Only stall on lumpy reclaim */ ++ if (!sc->lumpy_reclaim_mode) ++ return false; ++ ++ /* If we have relaimed everything on the isolated list, no stall */ ++ if (nr_freed == nr_taken) ++ return false; ++ ++ /* ++ * For high-order allocations, there are two stall thresholds. ++ * High-cost allocations stall immediately where as lower ++ * order allocations such as stacks require the scanning ++ * priority to be much higher before stalling. ++ */ ++ if (sc->order > PAGE_ALLOC_COSTLY_ORDER) ++ lumpy_stall_priority = DEF_PRIORITY; ++ else ++ lumpy_stall_priority = DEF_PRIORITY / 3; ++ ++ return priority <= lumpy_stall_priority; ++} ++ ++/* + * shrink_inactive_list() is a helper for shrink_zone(). It returns the number + * of reclaimed pages + */ +@@ -1047,7 +1094,7 @@ static unsigned long shrink_inactive_lis + unsigned long nr_freed; + unsigned long nr_active; + unsigned int count[NR_LRU_LISTS] = { 0, }; +- int mode = ISOLATE_INACTIVE; ++// use lumpy int mode = ISOLATE_INACTIVE; + + /* + * If we need a large contiguous chunk of memory, or have +@@ -1056,13 +1103,11 @@ static unsigned long shrink_inactive_lis + * + * We use the same threshold as pageout congestion_wait below. + */ +- if (sc->order > PAGE_ALLOC_COSTLY_ORDER) +- mode = ISOLATE_BOTH; +- else if (sc->order && priority < DEF_PRIORITY - 2) +- mode = ISOLATE_BOTH; + + nr_taken = sc->isolate_pages(sc->swap_cluster_max, +- &page_list, &nr_scan, sc->order, mode, ++ &page_list, &nr_scan, sc->order, ++ sc->lumpy_reclaim_mode ? ++ ISOLATE_BOTH : ISOLATE_INACTIVE, + zone, sc->mem_cgroup, 0, file); + nr_active = clear_active_flags(&page_list, count); + __count_vm_events(PGDEACTIVATE, nr_active); +@@ -1088,16 +1133,8 @@ static unsigned long shrink_inactive_lis + nr_scanned += nr_scan; + nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); + +- /* +- * If we are direct reclaiming for contiguous pages and we do +- * not reclaim everything in the list, try again and wait +- * for IO to complete. This will stall high-order allocations +- * but that should be acceptable to the caller +- */ +- if (nr_freed < nr_taken && !current_is_kswapd() && +- sc->order > PAGE_ALLOC_COSTLY_ORDER) { +- congestion_wait(WRITE, HZ/10); +- ++ /* Check if we should syncronously wait for writeback */ ++ if (should_reclaim_stall(nr_taken, nr_freed, priority, sc)) { + /* + * The attempt at page out may have made some + * of the pages active, mark them inactive again. +@@ -1404,6 +1441,20 @@ static void get_scan_ratio(struct zone * + percent[1] = 100 - percent[0]; + } + ++static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc) ++{ ++ /* ++ * If we need a large contiguous chunk of memory, or have ++ * trouble getting a small set of contiguous pages, we ++ * will reclaim both active and inactive pages. ++ */ ++ if (sc->order > PAGE_ALLOC_COSTLY_ORDER) ++ sc->lumpy_reclaim_mode = 1; ++ else if (sc->order && priority < DEF_PRIORITY - 2) ++ sc->lumpy_reclaim_mode = 1; ++ else ++ sc->lumpy_reclaim_mode = 0; ++} + + /* + * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. +@@ -1419,6 +1470,8 @@ static unsigned long shrink_zone(int pri + + get_scan_ratio(zone, sc, percent); + ++ set_lumpy_reclaim_mode(priority, sc); ++ + for_each_evictable_lru(l) { + if (scan_global_lru(sc)) { + int file = is_file_lru(l); -- 1.7.9.5