diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 154 |
1 files changed, 67 insertions, 87 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4be518d4e68a..ae052b5e3315 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -921,6 +921,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) iter = mem_cgroup_iter(NULL, iter, NULL)) /** + * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy + * @memcg: hierarchy root + * @fn: function to call for each task + * @arg: argument passed to @fn + * + * This function iterates over tasks attached to @memcg or to any of its + * descendants and calls @fn for each task. If @fn returns a non-zero + * value, the function breaks the iteration loop and returns the value. + * Otherwise, it will iterate over all tasks and return 0. + * + * This function must not be called for the root memory cgroup. + */ +int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, + int (*fn)(struct task_struct *, void *), void *arg) +{ + struct mem_cgroup *iter; + int ret = 0; + + BUG_ON(memcg == root_mem_cgroup); + + for_each_mem_cgroup_tree(iter, memcg) { + struct css_task_iter it; + struct task_struct *task; + + css_task_iter_start(&iter->css, &it); + while (!ret && (task = css_task_iter_next(&it))) + ret = fn(task, arg); + css_task_iter_end(&it); + if (ret) { + mem_cgroup_iter_break(memcg, iter); + break; + } + } + return ret; +} + +/** * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page * @page: the page * @zone: zone of the page @@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg) /* * Return the memory (and swap, if configured) limit for a memcg. */ -static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) +unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) { unsigned long limit; @@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, .gfp_mask = gfp_mask, .order = order, }; - struct mem_cgroup *iter; - unsigned long chosen_points = 0; - unsigned long totalpages; - unsigned int points = 0; - struct task_struct *chosen = NULL; + bool ret; mutex_lock(&oom_lock); - - /* - * If current has a pending SIGKILL or is exiting, then automatically - * select it. The goal is to allow it to allocate so that it may - * quickly exit and free its memory. - */ - if (task_will_free_mem(current)) { - mark_oom_victim(current); - wake_oom_reaper(current); - goto unlock; - } - - check_panic_on_oom(&oc, CONSTRAINT_MEMCG); - totalpages = mem_cgroup_get_limit(memcg) ? : 1; - for_each_mem_cgroup_tree(iter, memcg) { - struct css_task_iter it; - struct task_struct *task; - - css_task_iter_start(&iter->css, &it); - while ((task = css_task_iter_next(&it))) { - switch (oom_scan_process_thread(&oc, task)) { - case OOM_SCAN_SELECT: - if (chosen) - put_task_struct(chosen); - chosen = task; - chosen_points = ULONG_MAX; - get_task_struct(chosen); - /* fall through */ - case OOM_SCAN_CONTINUE: - continue; - case OOM_SCAN_ABORT: - css_task_iter_end(&it); - mem_cgroup_iter_break(memcg, iter); - if (chosen) - put_task_struct(chosen); - /* Set a dummy value to return "true". */ - chosen = (void *) 1; - goto unlock; - case OOM_SCAN_OK: - break; - }; - points = oom_badness(task, memcg, NULL, totalpages); - if (!points || points < chosen_points) - continue; - /* Prefer thread group leaders for display purposes */ - if (points == chosen_points && - thread_group_leader(chosen)) - continue; - - if (chosen) - put_task_struct(chosen); - chosen = task; - chosen_points = points; - get_task_struct(chosen); - } - css_task_iter_end(&it); - } - - if (chosen) { - points = chosen_points * 1000 / totalpages; - oom_kill_process(&oc, chosen, points, totalpages, - "Memory cgroup out of memory"); - } -unlock: + ret = out_of_memory(&oc); mutex_unlock(&oom_lock); - return chosen; + return ret; } #if MAX_NUMNODES > 1 @@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle) if (!memcg) return false; - if (!handle || oom_killer_disabled) + if (!handle) goto cleanup; owait.memcg = memcg; @@ -2969,16 +2939,16 @@ static int memcg_update_tcp_limit(struct mem_cgroup *memcg, unsigned long limit) /* * The active flag needs to be written after the static_key * update. This is what guarantees that the socket activation - * function is the last one to run. See sock_update_memcg() for - * details, and note that we don't mark any socket as belonging - * to this memcg until that flag is up. + * function is the last one to run. See mem_cgroup_sk_alloc() + * for details, and note that we don't mark any socket as + * belonging to this memcg until that flag is up. * * We need to do this, because static_keys will span multiple * sites, but we can't control their order. If we mark a socket * as accounted, but the accounting functions are not patched in * yet, we'll lose accounting. * - * We never race with the readers in sock_update_memcg(), + * We never race with the readers in mem_cgroup_sk_alloc(), * because when this value change, the code to process it is not * patched in yet. */ @@ -4092,11 +4062,13 @@ static DEFINE_IDR(mem_cgroup_idr); static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { + VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); atomic_add(n, &memcg->id.ref); } static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { + VM_BUG_ON(atomic_read(&memcg->id.ref) < n); if (atomic_sub_and_test(n, &memcg->id.ref)) { idr_remove(&mem_cgroup_idr, memcg->id.id); memcg->id.id = 0; @@ -4285,8 +4257,10 @@ fail: static int mem_cgroup_css_online(struct cgroup_subsys_state *css) { + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + /* Online state pins memcg ID, memcg ID pins CSS */ - mem_cgroup_id_get(mem_cgroup_from_css(css)); + atomic_set(&memcg->id.ref, 1); css_get(css); return 0; } @@ -4434,7 +4408,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, * Because lookup_swap_cache() updates some statistics counter, * we call find_get_page() with swapper_space directly. */ - page = find_get_page(swap_address_space(ent), ent.val); + page = find_get_page(swap_address_space(ent), swp_offset(ent)); if (do_memsw_account()) entry->val = ent.val; @@ -4472,7 +4446,8 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, swp_entry_t swp = radix_to_swp_entry(page); if (do_memsw_account()) *entry = swp; - page = find_get_page(swap_address_space(swp), swp.val); + page = find_get_page(swap_address_space(swp), + swp_offset(swp)); } } else page = find_get_page(mapping, pgoff); @@ -4707,7 +4682,8 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) .mm = mm, }; down_read(&mm->mmap_sem); - walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk); + walk_page_range(0, mm->highest_vm_end, + &mem_cgroup_count_precharge_walk); up_read(&mm->mmap_sem); precharge = mc.precharge; @@ -4995,7 +4971,8 @@ retry: * When we have consumed all precharges and failed in doing * additional charge, the page walk just aborts. */ - walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); + walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk); + up_read(&mc.mm->mmap_sem); atomic_dec(&mc.from->moving_account); } @@ -5674,11 +5651,15 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); EXPORT_SYMBOL(memcg_sockets_enabled_key); -void sock_update_memcg(struct sock *sk) +void mem_cgroup_sk_alloc(struct sock *sk) { struct mem_cgroup *memcg; - /* Socket cloning can throw us here with sk_cgrp already + if (!mem_cgroup_sockets_enabled) + return; + + /* + * Socket cloning can throw us here with sk_memcg already * filled. It won't however, necessarily happen from * process context. So the test for root memcg given * the current task's memcg won't help us in this case. @@ -5703,12 +5684,11 @@ void sock_update_memcg(struct sock *sk) out: rcu_read_unlock(); } -EXPORT_SYMBOL(sock_update_memcg); -void sock_release_memcg(struct sock *sk) +void mem_cgroup_sk_free(struct sock *sk) { - WARN_ON(!sk->sk_memcg); - css_put(&sk->sk_memcg->css); + if (sk->sk_memcg) + css_put(&sk->sk_memcg->css); } /** |