diff options
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 151 |
1 files changed, 145 insertions, 6 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 06f7e1707847..f7ed6ece0719 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -35,6 +35,11 @@ #include <linux/freezer.h> #include <linux/ftrace.h> #include <linux/ratelimit.h> +#include <linux/kthread.h> +#include <linux/init.h> + +#include <asm/tlb.h> +#include "internal.h" #define CREATE_TRACE_POINTS #include <trace/events/oom.h> @@ -405,6 +410,133 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); bool oom_killer_disabled __read_mostly; +#ifdef CONFIG_MMU +/* + * OOM Reaper kernel thread which tries to reap the memory used by the OOM + * victim (if that is possible) to help the OOM killer to move on. + */ +static struct task_struct *oom_reaper_th; +static struct mm_struct *mm_to_reap; +static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); + +static bool __oom_reap_vmas(struct mm_struct *mm) +{ + struct mmu_gather tlb; + struct vm_area_struct *vma; + struct zap_details details = {.check_swap_entries = true, + .ignore_dirty = true}; + bool ret = true; + + /* We might have raced with exit path */ + if (!atomic_inc_not_zero(&mm->mm_users)) + return true; + + if (!down_read_trylock(&mm->mmap_sem)) { + ret = false; + goto out; + } + + tlb_gather_mmu(&tlb, mm, 0, -1); + for (vma = mm->mmap ; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + /* + * mlocked VMAs require explicit munlocking before unmap. + * Let's keep it simple here and skip such VMAs. + */ + if (vma->vm_flags & VM_LOCKED) + continue; + + /* + * Only anonymous pages have a good chance to be dropped + * without additional steps which we cannot afford as we + * are OOM already. + * + * We do not even care about fs backed pages because all + * which are reclaimable have already been reclaimed and + * we do not want to block exit_mmap by keeping mm ref + * count elevated without a good reason. + */ + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) + unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end, + &details); + } + tlb_finish_mmu(&tlb, 0, -1); + up_read(&mm->mmap_sem); +out: + mmput(mm); + return ret; +} + +static void oom_reap_vmas(struct mm_struct *mm) +{ + int attempts = 0; + + /* Retry the down_read_trylock(mmap_sem) a few times */ + while (attempts++ < 10 && !__oom_reap_vmas(mm)) + schedule_timeout_idle(HZ/10); + + /* Drop a reference taken by wake_oom_reaper */ + mmdrop(mm); +} + +static int oom_reaper(void *unused) +{ + while (true) { + struct mm_struct *mm; + + wait_event_freezable(oom_reaper_wait, + (mm = READ_ONCE(mm_to_reap))); + oom_reap_vmas(mm); + WRITE_ONCE(mm_to_reap, NULL); + } + + return 0; +} + +static void wake_oom_reaper(struct mm_struct *mm) +{ + struct mm_struct *old_mm; + + if (!oom_reaper_th) + return; + + /* + * Pin the given mm. Use mm_count instead of mm_users because + * we do not want to delay the address space tear down. + */ + atomic_inc(&mm->mm_count); + + /* + * Make sure that only a single mm is ever queued for the reaper + * because multiple are not necessary and the operation might be + * disruptive so better reduce it to the bare minimum. + */ + old_mm = cmpxchg(&mm_to_reap, NULL, mm); + if (!old_mm) + wake_up(&oom_reaper_wait); + else + mmdrop(mm); +} + +static int __init oom_init(void) +{ + oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); + if (IS_ERR(oom_reaper_th)) { + pr_err("Unable to start OOM reaper %ld. Continuing regardless\n", + PTR_ERR(oom_reaper_th)); + oom_reaper_th = NULL; + } + return 0; +} +subsys_initcall(oom_init) +#else +static void wake_oom_reaper(struct mm_struct *mm) +{ +} +#endif + /** * mark_oom_victim - mark the given task as OOM victim * @tsk: task to mark @@ -510,6 +642,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, unsigned int victim_points = 0; static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); + bool can_oom_reap = true; /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -600,17 +733,23 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, continue; if (same_thread_group(p, victim)) continue; - if (unlikely(p->flags & PF_KTHREAD)) - continue; - if (is_global_init(p)) - continue; - if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) + if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p) || + p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { + /* + * We cannot use oom_reaper for the mm shared by this + * process because it wouldn't get killed and so the + * memory might be still used. + */ + can_oom_reap = false; continue; - + } do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); } rcu_read_unlock(); + if (can_oom_reap) + wake_oom_reaper(mm); + mmdrop(mm); put_task_struct(victim); } |