vmscan: kswapd carefully call compaction

With CONFIG_COMPACTION enabled, kswapd does not try to free contiguous free pages, even when it is woken for a higher order request. This could be bad for eg. jumbo frame network allocations, which are done from interrupt context and cannot compact memory themselves. Higher than before allocation failure rates in the network receive path have been observed in kernels with compaction enabled. Teach kswapd to defragment the memory zones in a node, but only if required and compaction is not deferred in a zone. [akpm@linux-foundation.org: reduce scope of zones_need_compaction] Signed-off-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Hillf Danton <dhillf@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Rik van Riel <riel@redhat.com> 2012-03-21 16:33:52 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-03-21 17:54:56 -0700
commit: 7be62de99adcab4449d416977b4274985c5fe023 (patch)
tree: 20ae021ec9811ad730e6a17a3530d3aa6b5027d0
parent: fe2c2a106663130a5ab45cb0e3414b52df2fff0c (diff)
download: linux-7be62de99adcab4449d416977b4274985c5fe023.tar.gz
linux-7be62de99adcab4449d416977b4274985c5fe023.tar.bz2
linux-7be62de99adcab4449d416977b4274985c5fe023.zip
3 files changed, 51 insertions, 18 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index bb2bbdbe5464..7a9323aef4a3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -23,6 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
 			bool sync);
+extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 
 /* Do not skip compaction more than 64 times */
@@ -62,6 +63,11 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	return COMPACT_CONTINUE;
 }
 
+static inline int compact_pgdat(pg_data_t *pgdat, int order)
+{
+	return COMPACT_CONTINUE;
+}
+
 static inline unsigned long compaction_suitable(struct zone *zone, int order)
 {
 	return COMPACT_SKIPPED;
diff --git a/mm/compaction.c b/mm/compaction.c
index d9ebebe1a2aa..36f0f61f4a24 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -675,44 +675,61 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 
 
 /* Compact all zones within a node */
-static int compact_node(int nid)
+static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 {
 	int zoneid;
-	pg_data_t *pgdat;
 	struct zone *zone;
 
-	if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
-		return -EINVAL;
-	pgdat = NODE_DATA(nid);
-
 	/* Flush pending updates to the LRU lists */
 	lru_add_drain_all();
 
 	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
-		struct compact_control cc = {
-			.nr_freepages = 0,
-			.nr_migratepages = 0,
-			.order = -1,
-			.sync = true,
-		};
 
 		zone = &pgdat->node_zones[zoneid];
 		if (!populated_zone(zone))
 			continue;
 
-		cc.zone = zone;
-		INIT_LIST_HEAD(&cc.freepages);
-		INIT_LIST_HEAD(&cc.migratepages);
+		cc->nr_freepages = 0;
+		cc->nr_migratepages = 0;
+		cc->zone = zone;
+		INIT_LIST_HEAD(&cc->freepages);
+		INIT_LIST_HEAD(&cc->migratepages);
 
-		compact_zone(zone, &cc);
+		if (cc->order < 0 || !compaction_deferred(zone))
+			compact_zone(zone, cc);
 
-		VM_BUG_ON(!list_empty(&cc.freepages));
-		VM_BUG_ON(!list_empty(&cc.migratepages));
+		VM_BUG_ON(!list_empty(&cc->freepages));
+		VM_BUG_ON(!list_empty(&cc->migratepages));
 	}
 
 	return 0;
 }
 
+int compact_pgdat(pg_data_t *pgdat, int order)
+{
+	struct compact_control cc = {
+		.order = order,
+		.sync = false,
+	};
+
+	return __compact_pgdat(pgdat, &cc);
+}
+
+static int compact_node(int nid)
+{
+	pg_data_t *pgdat;
+	struct compact_control cc = {
+		.order = -1,
+		.sync = true,
+	};
+
+	if (nid < 0 || nid >= nr_node_ids || !node_online(nid))
+		return -EINVAL;
+	pgdat = NODE_DATA(nid);
+
+	return __compact_pgdat(pgdat, &cc);
+}
+
 /* Compact all nodes in the system */
 static int compact_nodes(void)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d7dad2a4e69c..b2b4c4a0ada2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2919,6 +2919,8 @@ out:
 	 * and it is potentially going to sleep here.
 	 */
 	if (order) {
+		int zones_need_compaction = 1;
+
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
@@ -2939,9 +2941,17 @@ out:
 				goto loop_again;
 			}
 
+			/* Check if the memory needs to be defragmented. */
+			if (zone_watermark_ok(zone, order,
+				    low_wmark_pages(zone), *classzone_idx, 0))
+				zones_need_compaction = 0;
+
 			/* If balanced, clear the congested flag */
 			zone_clear_flag(zone, ZONE_CONGESTED);
 		}
+
+		if (zones_need_compaction)
+			compact_pgdat(pgdat, order);
 	}
 
 	/*
author	Rik van Riel <riel@redhat.com>	2012-03-21 16:33:52 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-03-21 17:54:56 -0700
commit	7be62de99adcab4449d416977b4274985c5fe023 (patch)
tree	20ae021ec9811ad730e6a17a3530d3aa6b5027d0
parent	fe2c2a106663130a5ab45cb0e3414b52df2fff0c (diff)
download	linux-7be62de99adcab4449d416977b4274985c5fe023.tar.gz linux-7be62de99adcab4449d416977b4274985c5fe023.tar.bz2 linux-7be62de99adcab4449d416977b4274985c5fe023.zip