summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c143
1 files changed, 90 insertions, 53 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 94172089f52f..3df3c04d73ab 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -170,7 +170,7 @@ struct mem_cgroup_event {
*/
poll_table pt;
wait_queue_head_t *wqh;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
struct work_struct remove;
};
@@ -631,7 +631,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
val = __this_cpu_read(memcg->stat->nr_page_events);
next = __this_cpu_read(memcg->stat->targets[target]);
/* from time_after() in jiffies.h */
- if ((long)next - (long)val < 0) {
+ if ((long)(next - val) < 0) {
switch (target) {
case MEM_CGROUP_TARGET_THRESH:
next = val + THRESHOLDS_EVENTS_TARGET;
@@ -1479,10 +1479,10 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
struct oom_wait_info {
struct mem_cgroup *memcg;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
};
-static int memcg_oom_wake_function(wait_queue_t *wait,
+static int memcg_oom_wake_function(wait_queue_entry_t *wait,
unsigned mode, int sync, void *arg)
{
struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
@@ -1570,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
owait.wait.flags = 0;
owait.wait.func = memcg_oom_wake_function;
owait.wait.private = current;
- INIT_LIST_HEAD(&owait.wait.task_list);
+ INIT_LIST_HEAD(&owait.wait.entry);
prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
mem_cgroup_mark_under_oom(memcg);
@@ -2376,10 +2376,9 @@ void mem_cgroup_split_huge_fixup(struct page *head)
#ifdef CONFIG_MEMCG_SWAP
static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- bool charge)
+ int nr_entries)
{
- int val = (charge) ? 1 : -1;
- this_cpu_add(memcg->stat->count[MEMCG_SWAP], val);
+ this_cpu_add(memcg->stat->count[MEMCG_SWAP], nr_entries);
}
/**
@@ -2405,8 +2404,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
new_id = mem_cgroup_id(to);
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
- mem_cgroup_swap_statistics(from, false);
- mem_cgroup_swap_statistics(to, true);
+ mem_cgroup_swap_statistics(from, -1);
+ mem_cgroup_swap_statistics(to, 1);
return 0;
}
return -EINVAL;
@@ -3574,6 +3573,7 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
+ seq_printf(sf, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
return 0;
}
@@ -3725,7 +3725,7 @@ static void memcg_event_remove(struct work_struct *work)
*
* Called with wqh->lock held and interrupts disabled.
*/
-static int memcg_event_wake(wait_queue_t *wait, unsigned mode,
+static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode,
int sync, void *key)
{
struct mem_cgroup_event *event =
@@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn)
return 1;
+ pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
+ if (!pn->lruvec_stat) {
+ kfree(pn);
+ return 1;
+ }
+
lruvec_init(&pn->lruvec);
pn->usage_in_excess = 0;
pn->on_tree = false;
@@ -4133,7 +4139,10 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
{
- kfree(memcg->nodeinfo[node]);
+ struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
+
+ free_percpu(pn->lruvec_stat);
+ kfree(pn);
}
static void __mem_cgroup_free(struct mem_cgroup *memcg)
@@ -5165,6 +5174,7 @@ static int memory_events_show(struct seq_file *m, void *v)
seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+ seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
return 0;
}
@@ -5197,8 +5207,8 @@ static int memory_stat_show(struct seq_file *m, void *v)
seq_printf(m, "kernel_stack %llu\n",
(u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
seq_printf(m, "slab %llu\n",
- (u64)(stat[MEMCG_SLAB_RECLAIMABLE] +
- stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
+ (u64)(stat[NR_SLAB_RECLAIMABLE] +
+ stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
seq_printf(m, "sock %llu\n",
(u64)stat[MEMCG_SOCK] * PAGE_SIZE);
@@ -5222,15 +5232,25 @@ static int memory_stat_show(struct seq_file *m, void *v)
}
seq_printf(m, "slab_reclaimable %llu\n",
- (u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE);
+ (u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
seq_printf(m, "slab_unreclaimable %llu\n",
- (u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+ (u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
/* Accumulated memory events */
seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
+ seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
+ seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
+ events[PGSCAN_DIRECT]);
+ seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
+ events[PGSTEAL_DIRECT]);
+ seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
+ seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
+ seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
+ seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
+
seq_printf(m, "workingset_refault %lu\n",
stat[WORKINGSET_REFAULT]);
seq_printf(m, "workingset_activate %lu\n",
@@ -5297,38 +5317,52 @@ struct cgroup_subsys memory_cgrp_subsys = {
/**
* mem_cgroup_low - check if memory consumption is below the normal range
- * @root: the highest ancestor to consider
+ * @root: the top ancestor of the sub-tree being checked
* @memcg: the memory cgroup to check
*
* Returns %true if memory consumption of @memcg, and that of all
- * configurable ancestors up to @root, is below the normal range.
+ * ancestors up to (but not including) @root, is below the normal range.
+ *
+ * @root is exclusive; it is never low when looked at directly and isn't
+ * checked when traversing the hierarchy.
+ *
+ * Excluding @root enables using memory.low to prioritize memory usage
+ * between cgroups within a subtree of the hierarchy that is limited by
+ * memory.high or memory.max.
+ *
+ * For example, given cgroup A with children B and C:
+ *
+ * A
+ * / \
+ * B C
+ *
+ * and
+ *
+ * 1. A/memory.current > A/memory.high
+ * 2. A/B/memory.current < A/B/memory.low
+ * 3. A/C/memory.current >= A/C/memory.low
+ *
+ * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we
+ * should reclaim from 'C' until 'A' is no longer high or until we can
+ * no longer reclaim from 'C'. If 'A', i.e. @root, isn't excluded by
+ * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered
+ * low and we will reclaim indiscriminately from both 'B' and 'C'.
*/
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
{
if (mem_cgroup_disabled())
return false;
- /*
- * The toplevel group doesn't have a configurable range, so
- * it's never low when looked at directly, and it is not
- * considered an ancestor when assessing the hierarchy.
- */
-
- if (memcg == root_mem_cgroup)
- return false;
-
- if (page_counter_read(&memcg->memory) >= memcg->low)
+ if (!root)
+ root = root_mem_cgroup;
+ if (memcg == root)
return false;
- while (memcg != root) {
- memcg = parent_mem_cgroup(memcg);
-
- if (memcg == root_mem_cgroup)
- break;
-
+ for (; memcg != root; memcg = parent_mem_cgroup(memcg)) {
if (page_counter_read(&memcg->memory) >= memcg->low)
return false;
}
+
return true;
}
@@ -5445,7 +5479,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
* let's not wait for it. The page already received a
* memory+swap charge, drop the swap entry duplicate.
*/
- mem_cgroup_uncharge_swap(entry);
+ mem_cgroup_uncharge_swap(entry, nr_pages);
}
}
@@ -5873,9 +5907,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
* ancestor for the swap instead and transfer the memory+swap charge.
*/
swap_memcg = mem_cgroup_id_get_online(memcg);
- oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg), 1);
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(swap_memcg, true);
+ mem_cgroup_swap_statistics(swap_memcg, 1);
page->mem_cgroup = NULL;
@@ -5902,19 +5936,20 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
css_put(&memcg->css);
}
-/*
- * mem_cgroup_try_charge_swap - try charging a swap entry
+/**
+ * mem_cgroup_try_charge_swap - try charging swap space for a page
* @page: page being added to swap
* @entry: swap entry to charge
*
- * Try to charge @entry to the memcg that @page belongs to.
+ * Try to charge @page's memcg for the swap space at @entry.
*
* Returns 0 on success, -ENOMEM on failure.
*/
int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
{
- struct mem_cgroup *memcg;
+ unsigned int nr_pages = hpage_nr_pages(page);
struct page_counter *counter;
+ struct mem_cgroup *memcg;
unsigned short oldid;
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account)
@@ -5929,25 +5964,27 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
memcg = mem_cgroup_id_get_online(memcg);
if (!mem_cgroup_is_root(memcg) &&
- !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+ !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
mem_cgroup_id_put(memcg);
return -ENOMEM;
}
- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+ /* Get references for the tail pages, too */
+ if (nr_pages > 1)
+ mem_cgroup_id_get_many(memcg, nr_pages - 1);
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
VM_BUG_ON_PAGE(oldid, page);
- mem_cgroup_swap_statistics(memcg, true);
+ mem_cgroup_swap_statistics(memcg, nr_pages);
return 0;
}
/**
- * mem_cgroup_uncharge_swap - uncharge a swap entry
+ * mem_cgroup_uncharge_swap - uncharge swap space
* @entry: swap entry to uncharge
- *
- * Drop the swap charge associated with @entry.
+ * @nr_pages: the amount of swap space to uncharge
*/
-void mem_cgroup_uncharge_swap(swp_entry_t entry)
+void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
{
struct mem_cgroup *memcg;
unsigned short id;
@@ -5955,18 +5992,18 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
if (!do_swap_account)
return;
- id = swap_cgroup_record(entry, 0);
+ id = swap_cgroup_record(entry, 0, nr_pages);
rcu_read_lock();
memcg = mem_cgroup_from_id(id);
if (memcg) {
if (!mem_cgroup_is_root(memcg)) {
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
- page_counter_uncharge(&memcg->swap, 1);
+ page_counter_uncharge(&memcg->swap, nr_pages);
else
- page_counter_uncharge(&memcg->memsw, 1);
+ page_counter_uncharge(&memcg->memsw, nr_pages);
}
- mem_cgroup_swap_statistics(memcg, false);
- mem_cgroup_id_put(memcg);
+ mem_cgroup_swap_statistics(memcg, -nr_pages);
+ mem_cgroup_id_put_many(memcg, nr_pages);
}
rcu_read_unlock();
}