diff options
author | Roman Gushchin <guro@fb.com> | 2018-06-07 17:07:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 17:34:36 -0700 |
commit | bf8d5d52ffe89aac5b46ddb39dd1a4351fae5df4 (patch) | |
tree | e0b0457ddf128b0562eb403762b2f2de2292e8b1 /include | |
parent | fb52bbaee598f58352d8732637ebe7013b2df79f (diff) | |
download | linux-stable-bf8d5d52ffe89aac5b46ddb39dd1a4351fae5df4.tar.gz linux-stable-bf8d5d52ffe89aac5b46ddb39dd1a4351fae5df4.tar.bz2 linux-stable-bf8d5d52ffe89aac5b46ddb39dd1a4351fae5df4.zip |
memcg: introduce memory.min
Memory controller implements the memory.low best-effort memory
protection mechanism, which works perfectly in many cases and allows
protecting working sets of important workloads from sudden reclaim.
But its semantics has a significant limitation: it works only as long as
there is a supply of reclaimable memory. This makes it pretty useless
against any sort of slow memory leaks or memory usage increases. This
is especially true for swapless systems. If swap is enabled, memory
soft protection effectively postpones problems, allowing a leaking
application to fill all swap area, which makes no sense. The only
effective way to guarantee the memory protection in this case is to
invoke the OOM killer.
It's possible to handle this case in userspace by reacting on MEMCG_LOW
events; but there is still a place for a fail-safe in-kernel mechanism
to provide stronger guarantees.
This patch introduces the memory.min interface for cgroup v2 memory
controller. It works very similarly to memory.low (sharing the same
hierarchical behavior), except that it's not disabled if there is no
more reclaimable memory in the system.
If cgroup is not populated, its memory.min is ignored, because otherwise
even the OOM killer wouldn't be able to reclaim the protected memory,
and the system can stall.
[guro@fb.com: s/low/min/ in docs]
Link: http://lkml.kernel.org/r/20180510130758.GA9129@castle.DHCP.thefacebook.com
Link: http://lkml.kernel.org/r/20180509180734.GA4856@castle.DHCP.thefacebook.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/memcontrol.h | 15 | ||||
-rw-r--r-- | include/linux/page_counter.h | 11 |
2 files changed, 20 insertions, 6 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 10d741e8fe51..9c04cf8e6487 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -58,6 +58,12 @@ enum memcg_memory_event { MEMCG_NR_MEMORY_EVENTS, }; +enum mem_cgroup_protection { + MEMCG_PROT_NONE, + MEMCG_PROT_LOW, + MEMCG_PROT_MIN, +}; + struct mem_cgroup_reclaim_cookie { pg_data_t *pgdat; int priority; @@ -289,7 +295,8 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); +enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, + struct mem_cgroup *memcg); int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp, @@ -734,10 +741,10 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, { } -static inline bool mem_cgroup_low(struct mem_cgroup *root, - struct mem_cgroup *memcg) +static inline enum mem_cgroup_protection mem_cgroup_protected( + struct mem_cgroup *root, struct mem_cgroup *memcg) { - return false; + return MEMCG_PROT_NONE; } static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 7902a727d3b6..bab7e57f659b 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -8,10 +8,16 @@ struct page_counter { atomic_long_t usage; - unsigned long max; + unsigned long min; unsigned long low; + unsigned long max; struct page_counter *parent; + /* effective memory.min and memory.min usage tracking */ + unsigned long emin; + atomic_long_t min_usage; + atomic_long_t children_min_usage; + /* effective memory.low and memory.low usage tracking */ unsigned long elow; atomic_long_t low_usage; @@ -47,8 +53,9 @@ bool page_counter_try_charge(struct page_counter *counter, unsigned long nr_pages, struct page_counter **fail); void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); -int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages); +void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages); void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages); +int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages); int page_counter_memparse(const char *buf, const char *max, unsigned long *nr_pages); |