summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--mm/memcontrol.c163
-rw-r--r--mm/vmscan.c62
3 files changed, 60 insertions, 175 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c416092e324..4b78661c68d0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -180,9 +180,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
mem_cgroup_update_page_stat(page, idx, -1);
}
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned);
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg);
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -359,11 +357,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
}
static inline
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned)
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
{
- return 0;
+ return false;
}
static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fe6b9f96abdd..6c32271a31c5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2004,57 +2004,28 @@ static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
}
#endif
-static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
- struct zone *zone,
- gfp_t gfp_mask,
- unsigned long *total_scanned)
-{
- struct mem_cgroup *victim = NULL;
- int total = 0;
- int loop = 0;
- unsigned long excess;
- unsigned long nr_scanned;
- struct mem_cgroup_reclaim_cookie reclaim = {
- .zone = zone,
- .priority = 0,
- };
-
- excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
-
- while (1) {
- victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
- if (!victim) {
- loop++;
- if (loop >= 2) {
- /*
- * If we have not been able to reclaim
- * anything, it might because there are
- * no reclaimable pages under this hierarchy
- */
- if (!total)
- break;
- /*
- * We want to do more targeted reclaim.
- * excess >> 2 is not to excessive so as to
- * reclaim too much, nor too less that we keep
- * coming back to reclaim from this cgroup
- */
- if (total >= (excess >> 2) ||
- (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
- break;
- }
- continue;
- }
- if (!mem_cgroup_reclaimable(victim, false))
- continue;
- total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
- zone, &nr_scanned);
- *total_scanned += nr_scanned;
- if (!res_counter_soft_limit_excess(&root_memcg->res))
- break;
+/*
+ * A group is eligible for the soft limit reclaim if it is
+ * a) is over its soft limit
+ * b) any parent up the hierarchy is over its soft limit
+ */
+bool mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg)
+{
+ struct mem_cgroup *parent = memcg;
+
+ if (res_counter_soft_limit_excess(&memcg->res))
+ return true;
+
+ /*
+ * If any parent up the hierarchy is over its soft limit then we
+ * have to obey and reclaim from this group as well.
+ */
+ while((parent = parent_mem_cgroup(parent))) {
+ if (res_counter_soft_limit_excess(&parent->res))
+ return true;
}
- mem_cgroup_iter_break(root_memcg, victim);
- return total;
+
+ return false;
}
/*
@@ -4727,98 +4698,6 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
return ret;
}
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
- gfp_t gfp_mask,
- unsigned long *total_scanned)
-{
- unsigned long nr_reclaimed = 0;
- struct mem_cgroup_per_zone *mz, *next_mz = NULL;
- unsigned long reclaimed;
- int loop = 0;
- struct mem_cgroup_tree_per_zone *mctz;
- unsigned long long excess;
- unsigned long nr_scanned;
-
- if (order > 0)
- return 0;
-
- mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
- /*
- * This loop can run a while, specially if mem_cgroup's continuously
- * keep exceeding their soft limit and putting the system under
- * pressure
- */
- do {
- if (next_mz)
- mz = next_mz;
- else
- mz = mem_cgroup_largest_soft_limit_node(mctz);
- if (!mz)
- break;
-
- nr_scanned = 0;
- reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
- gfp_mask, &nr_scanned);
- nr_reclaimed += reclaimed;
- *total_scanned += nr_scanned;
- spin_lock(&mctz->lock);
-
- /*
- * If we failed to reclaim anything from this memory cgroup
- * it is time to move on to the next cgroup
- */
- next_mz = NULL;
- if (!reclaimed) {
- do {
- /*
- * Loop until we find yet another one.
- *
- * By the time we get the soft_limit lock
- * again, someone might have aded the
- * group back on the RB tree. Iterate to
- * make sure we get a different mem.
- * mem_cgroup_largest_soft_limit_node returns
- * NULL if no other cgroup is present on
- * the tree
- */
- next_mz =
- __mem_cgroup_largest_soft_limit_node(mctz);
- if (next_mz == mz)
- css_put(&next_mz->memcg->css);
- else /* next_mz == NULL or other memcg */
- break;
- } while (1);
- }
- __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
- excess = res_counter_soft_limit_excess(&mz->memcg->res);
- /*
- * One school of thought says that we should not add
- * back the node to the tree if reclaim returns 0.
- * But our reclaim could return 0, simply because due
- * to priority we are exposing a smaller subset of
- * memory to reclaim from. Consider this as a longer
- * term TODO.
- */
- /* If excess == 0, no tree ops */
- __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
- spin_unlock(&mctz->lock);
- css_put(&mz->memcg->css);
- loop++;
- /*
- * Could not reclaim anything and there are no more
- * mem cgroups to try or we seem to be looping without
- * reclaiming anything.
- */
- if (!nr_reclaimed &&
- (next_mz == NULL ||
- loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
- break;
- } while (!nr_reclaimed);
- if (next_mz)
- css_put(&next_mz->memcg->css);
- return nr_reclaimed;
-}
-
/**
* mem_cgroup_force_empty_list - clears LRU of a group
* @memcg: group to clear
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fe715daeb8bc..cf4643807ec2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -139,11 +139,21 @@ static bool global_reclaim(struct scan_control *sc)
{
return !sc->target_mem_cgroup;
}
+
+static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
+{
+ return !mem_cgroup_disabled() && global_reclaim(sc);
+}
#else
static bool global_reclaim(struct scan_control *sc)
{
return true;
}
+
+static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
+{
+ return false;
+}
#endif
unsigned long zone_reclaimable_pages(struct zone *zone)
@@ -2130,7 +2140,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
}
}
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static void
+__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
{
unsigned long nr_reclaimed, nr_scanned;
@@ -2149,6 +2160,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
do {
struct lruvec *lruvec;
+ if (soft_reclaim &&
+ !mem_cgroup_soft_reclaim_eligible(memcg)) {
+ memcg = mem_cgroup_iter(root, memcg, &reclaim);
+ continue;
+ }
+
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
shrink_lruvec(lruvec, sc);
@@ -2179,6 +2196,24 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
sc->nr_scanned - nr_scanned, sc));
}
+
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
+{
+ bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
+ unsigned long nr_scanned = sc->nr_scanned;
+
+ __shrink_zone(zone, sc, do_soft_reclaim);
+
+ /*
+ * No group is over the soft limit or those that are do not have
+ * pages in the zone we are reclaiming so we have to reclaim everybody
+ */
+ if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
+ __shrink_zone(zone, sc, false);
+ return;
+ }
+}
+
/* Returns true if compaction should go ahead for a high-order request */
static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
@@ -2240,8 +2275,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{
struct zoneref *z;
struct zone *zone;
- unsigned long nr_soft_reclaimed;
- unsigned long nr_soft_scanned;
bool aborted_reclaim = false;
/*
@@ -2281,18 +2314,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
continue;
}
}
- /*
- * This steals pages from memory cgroups over softlimit
- * and returns the number of reclaimed pages and
- * scanned pages. This works for global memory pressure
- * and balancing, not for a memcg's limit.
- */
- nr_soft_scanned = 0;
- nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
- sc->order, sc->gfp_mask,
- &nr_soft_scanned);
- sc->nr_reclaimed += nr_soft_reclaimed;
- sc->nr_scanned += nr_soft_scanned;
/* need some check for avoid more shrink_zone() */
}
@@ -2880,8 +2901,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
{
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
- unsigned long nr_soft_reclaimed;
- unsigned long nr_soft_scanned;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
@@ -2996,15 +3015,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
sc.nr_scanned = 0;
- nr_soft_scanned = 0;
- /*
- * Call soft limit reclaim before calling shrink_zone.
- */
- nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
- order, sc.gfp_mask,
- &nr_soft_scanned);
- sc.nr_reclaimed += nr_soft_reclaimed;
-
/*
* There should be no need to raise the scanning
* priority if enough pages are already being scanned