summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c55
1 files changed, 33 insertions, 22 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dfefa1d99d1b..70347d626fb3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2461,12 +2461,12 @@ out:
int file = is_file_lru(lru);
unsigned long lruvec_size;
unsigned long scan;
- unsigned long protection;
+ unsigned long min, low;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- protection = mem_cgroup_protection(memcg);
+ mem_cgroup_protection(memcg, &min, &low);
- if (protection > 0) {
+ if (min || low) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
@@ -2481,28 +2481,38 @@ out:
* set it too low, which is not ideal.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
- unsigned long baseline = 0;
/*
- * During the reclaim first pass, we only consider
- * cgroups in excess of their protection setting, but if
- * that doesn't produce free pages, we come back for a
- * second pass where we reclaim from all groups.
+ * If there is any protection in place, we adjust scan
+ * pressure in proportion to how much a group's current
+ * usage exceeds that, in percent.
*
- * To maintain fairness in both cases, the first pass
- * targets groups in proportion to their overage, and
- * the second pass targets groups in proportion to their
- * protection utilization.
- *
- * So on the first pass, a group whose size is 130% of
- * its protection will be targeted at 30% of its size.
- * On the second pass, a group whose size is at 40% of
- * its protection will be
- * targeted at 40% of its size.
+ * There is one special case: in the first reclaim pass,
+ * we skip over all groups that are within their low
+ * protection. If that fails to reclaim enough pages to
+ * satisfy the reclaim goal, we come back and override
+ * the best-effort low protection. However, we still
+ * ideally want to honor how well-behaved groups are in
+ * that case instead of simply punishing them all
+ * equally. As such, we reclaim them based on how much
+ * of their best-effort protection they are using. Usage
+ * below memory.min is excluded from consideration when
+ * calculating utilisation, as it isn't ever
+ * reclaimable, so it might as well not exist for our
+ * purposes.
*/
- if (!sc->memcg_low_reclaim)
- baseline = lruvec_size;
- scan = lruvec_size * cgroup_size / protection - baseline;
+ if (sc->memcg_low_reclaim && low > min) {
+ /*
+ * Reclaim according to utilisation between min
+ * and low
+ */
+ scan = lruvec_size * (cgroup_size - min) /
+ (low - min);
+ } else {
+ /* Reclaim according to protection overage */
+ scan = lruvec_size * cgroup_size /
+ max(min, low) - lruvec_size;
+ }
/*
* Don't allow the scan target to exceed the lruvec
@@ -2518,7 +2528,8 @@ out:
* some cases in the case of large overages.
*
* Also, minimally target SWAP_CLUSTER_MAX pages to keep
- * reclaim moving forwards.
+ * reclaim moving forwards, avoiding decremeting
+ * sc->priority further than desirable.
*/
scan = clamp(scan, SWAP_CLUSTER_MAX, lruvec_size);
} else {